diff --git a/autodl/DELTA/README.md b/autodl/DELTA/README.md deleted file mode 100644 index 6f0bd2601d1a830c77164cfeca7234d73c820d8b..0000000000000000000000000000000000000000 --- a/autodl/DELTA/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# DELTA: DEep Learning Transfer using Feature Map with Attention for Convolutional Networks - -## Introduction - -This page implements the [DELTA](https://arxiv.org/abs/1901.09229) algorithm in [PaddlePaddle](https://www.paddlepaddle.org.cn). - -> Li, Xingjian, et al. "DELTA: Deep learning transfer using feature map with attention for convolutional networks." ICLR 2019. - -## Preparation of Data and Pre-trained Model - -- Download transfer learning target datasets, like [Caltech-256](https://www.kaggle.com/jessicali9530/caltech256), [CUB_200_2011](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html) or others. Arrange the dataset in this way: -``` - root/train/dog/xxy.jpg - root/train/dog/xxz.jpg - ... - root/train/cat/nsdf3.jpg - root/train/cat/asd932_.jpg - ... - - root/test/dog/xxx.jpg - ... - root/test/cat/123.jpg - ... -``` - -- Download [the pretrained models](https://github.com/PaddlePaddle/models/tree/release/1.7/PaddleCV/image_classification#resnet-series). We give the results of ResNet-101 below. - -## Running Scripts - -Modify `global_data_path` in `datasets/data_path` to the path root where the dataset is. - -```bash -python -u main.py --dataset Caltech30 --delta_reg 0.1 --wd_rate 1e-4 --batch_size 64 --outdir outdir --num_epoch 100 --use_cuda 0 -python -u main.py --dataset CUB_200_2011 --delta_reg 0.1 --wd_rate 1e-4 --batch_size 64 --outdir outdir --num_epoch 100 --use_cuda 0 -``` - -Those scripts give the results below: - -\ | l2 | delta ----|---|--- -Caltech-256|79.86|84.71 -CUB_200|77.41|80.05 diff --git a/autodl/DELTA/args.py b/autodl/DELTA/args.py deleted file mode 100644 index f4daf7c63dd6513dcc820d0136a4d9bc19b86386..0000000000000000000000000000000000000000 --- a/autodl/DELTA/args.py +++ /dev/null @@ -1,26 +0,0 @@ -import argparse - -parser = argparse.ArgumentParser() - -parser.add_argument( - '--prefix', default=None, type=str, help='prefix for model id') -parser.add_argument('--dataset', default='PetImages', type=str, help='dataset') -parser.add_argument( - '--seed', - default=None, - type=int, - help='random seed (default: None, i.e., not fix the randomness).') -parser.add_argument('--batch_size', default=20, type=int, help='batch_size.') -parser.add_argument('--delta_reg', default=0.1, type=float, help='delta_reg.') -parser.add_argument('--wd_rate', default=1e-4, type=float, help='wd_rate.') -parser.add_argument( - '--use_cuda', default=0, type=int, help='use_cuda device. -1 cpu.') -parser.add_argument('--num_epoch', default=100, type=int, help='num_epoch.') -parser.add_argument('--outdir', default='outdir', type=str, help='outdir') -parser.add_argument( - '--pretrained_model', - default='./pretrained_models/ResNet101_pretrained', - type=str, - help='pretrained model pathname') - -args = parser.parse_args() diff --git a/autodl/DELTA/datasets/data_path.py b/autodl/DELTA/datasets/data_path.py deleted file mode 100644 index d7ea72e38ca7466cf2d84ad1996af4a5f98e7f4b..0000000000000000000000000000000000000000 --- a/autodl/DELTA/datasets/data_path.py +++ /dev/null @@ -1 +0,0 @@ -global_data_path = '[root_path]/datasets' diff --git a/autodl/DELTA/datasets/readers.py b/autodl/DELTA/datasets/readers.py deleted file mode 100644 index e378a4bb0345abddcf39fc841a44916b9fae5c1c..0000000000000000000000000000000000000000 --- a/autodl/DELTA/datasets/readers.py +++ /dev/null @@ -1,165 +0,0 @@ -import cv2 -import numpy as np -import six -import os -import glob - - -def resize_short(img, target_size, interpolation=None): - """resize image - - Args: - img: image data - target_size: resize short target size - interpolation: interpolation mode - - Returns: - resized image data - """ - percent = float(target_size) / min(img.shape[0], img.shape[1]) - resized_width = int(round(img.shape[1] * percent)) - resized_height = int(round(img.shape[0] * percent)) - if interpolation: - resized = cv2.resize( - img, (resized_width, resized_height), interpolation=interpolation) - else: - resized = cv2.resize(img, (resized_width, resized_height)) - return resized - - -def crop_image(img, target_size, center): - """crop image - - Args: - img: images data - target_size: crop target size - center: crop mode - - Returns: - img: cropped image data - """ - height, width = img.shape[:2] - size = target_size - if center == True: - w_start = (width - size) // 2 - h_start = (height - size) // 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img[h_start:h_end, w_start:w_end, :] - return img - - -def preprocess_image(img, random_mirror=True): - """ - centered, scaled by 1/255. - :param img: np.array: shape: [ns, h, w, 3], color order: rgb. - :return: np.array: shape: [ns, h, w, 3] - """ - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - - # transpose to [ns, 3, h, w] - img = img.astype('float32').transpose((0, 3, 1, 2)) / 255 - - img_mean = np.array(mean).reshape((3, 1, 1)) - img_std = np.array(std).reshape((3, 1, 1)) - img -= img_mean - img /= img_std - - if random_mirror: - mirror = int(np.random.uniform(0, 2)) - if mirror == 1: - img = img[:, :, ::-1, :] - - return img - - -def _find_classes(dir): - # Faster and available in Python 3.5 and above - classes = [d.name for d in os.scandir(dir) if d.is_dir()] - classes.sort() - class_to_idx = {classes[i]: i for i in range(len(classes))} - return classes, class_to_idx - - -class ReaderConfig(): - """ - A generic data loader where the images are arranged in this way: - - root/train/dog/xxy.jpg - root/train/dog/xxz.jpg - ... - root/train/cat/nsdf3.jpg - root/train/cat/asd932_.jpg - ... - - root/test/dog/xxx.jpg - ... - root/test/cat/123.jpg - ... - - """ - - def __init__(self, dataset_dir, is_test): - image_paths, labels, self.num_classes = self.reader_creator( - dataset_dir, is_test) - random_per = np.random.permutation(range(len(image_paths))) - self.image_paths = image_paths[random_per] - self.labels = labels[random_per] - self.is_test = is_test - - def get_reader(self): - def reader(): - IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', - '.tif', '.tiff', '.webp') - target_size = 256 - crop_size = 224 - - for i, img_path in enumerate(self.image_paths): - if not img_path.lower().endswith(IMG_EXTENSIONS): - continue - - img = cv2.imread(img_path) - if img is None: - print(img_path) - continue - img = resize_short(img, target_size, interpolation=None) - img = crop_image(img, crop_size, center=self.is_test) - img = img[:, :, ::-1] - img = np.expand_dims(img, axis=0) - - img = preprocess_image(img, not self.is_test) - - yield img, self.labels[i] - - return reader - - def reader_creator(self, dataset_dir, is_test=False): - IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', - '.tif', '.tiff', '.webp') - - # read - if is_test: - datasubset_dir = os.path.join(dataset_dir, 'test') - else: - datasubset_dir = os.path.join(dataset_dir, 'train') - - class_names, class_to_idx = _find_classes(datasubset_dir) - # num_classes = len(class_names) - image_paths = [] - labels = [] - for class_name in class_names: - classes_dir = os.path.join(datasubset_dir, class_name) - for img_path in glob.glob(os.path.join(classes_dir, '*')): - if not img_path.lower().endswith(IMG_EXTENSIONS): - continue - - image_paths.append(img_path) - labels.append(class_to_idx[class_name]) - - image_paths = np.array(image_paths) - labels = np.array(labels) - return image_paths, labels, len(class_names) diff --git a/autodl/DELTA/main.py b/autodl/DELTA/main.py deleted file mode 100644 index b40e857259bdea89e449f53706a1cea1ea07c1eb..0000000000000000000000000000000000000000 --- a/autodl/DELTA/main.py +++ /dev/null @@ -1,327 +0,0 @@ -import os -import time -import sys -import math -import numpy as np -import functools -import re -import logging -import glob - -import paddle -import paddle.fluid as fluid -from models.resnet import ResNet101 -from datasets.readers import ReaderConfig - -# import cv2 -# import skimage -# import matplotlib.pyplot as plt -# from paddle.fluid.core import PaddleTensor -# from paddle.fluid.core import AnalysisConfig -# from paddle.fluid.core import create_paddle_predictor - -from args import args -from datasets.data_path import global_data_path - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) -if args.seed is not None: - np.random.seed(args.seed) - -print(os.environ.get('LD_LIBRARY_PATH', None)) -print(os.environ.get('PATH', None)) - - -class AverageMeter(object): - """Computes and stores the average and current value""" - - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - -def load_vars_by_dict(executor, name_var_dict, main_program=None): - from paddle.fluid.framework import Program, Variable - from paddle.fluid import core - - load_prog = Program() - load_block = load_prog.global_block() - - if main_program is None: - main_program = fluid.default_main_program() - - if not isinstance(main_program, Program): - raise TypeError("program should be as Program type or None") - - for each_var_name in name_var_dict.keys(): - assert isinstance(name_var_dict[each_var_name], Variable) - if name_var_dict[each_var_name].type == core.VarDesc.VarType.RAW: - continue - - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [name_var_dict[each_var_name]]}, - attrs={'file_path': each_var_name}) - - executor.run(load_prog) - - -def get_model_id(): - prefix = '' - if args.prefix is not None: - prefix = args.prefix + '-' # for some notes. - - model_id = prefix + args.dataset + \ - '-epo_' + str(args.num_epoch) + \ - '-b_' + str(args.batch_size) + \ - '-reg_' + str(args.delta_reg) + \ - '-wd_' + str(args.wd_rate) - return model_id - - -def train(): - dataset = args.dataset - image_shape = [3, 224, 224] - pretrained_model = args.pretrained_model - - class_map_path = f'{global_data_path}/{dataset}/readable_label.txt' - - if os.path.exists(class_map_path): - logger.info( - "The map of readable label and numerical label has been found!") - with open(class_map_path) as f: - label_dict = {} - strinfo = re.compile(r"\d+ ") - for item in f.readlines(): - key = int(item.split(" ")[0]) - value = [ - strinfo.sub("", l).replace("\n", "") - for l in item.split(", ") - ] - label_dict[key] = value[0] - - assert os.path.isdir( - pretrained_model), "please load right pretrained model path for infer" - - # data reader - batch_size = args.batch_size - reader_config = ReaderConfig(f'{global_data_path}/{dataset}', is_test=False) - reader = reader_config.get_reader() - train_reader = paddle.batch( - paddle.reader.shuffle(reader, buf_size=batch_size), - batch_size, - drop_last=True) - - # model ops - image = fluid.data( - name='image', shape=[None] + image_shape, dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - model = ResNet101(is_test=False) - features, logits = model.net( - input=image, class_dim=reader_config.num_classes) - out = fluid.layers.softmax(logits) - - # loss, metric - cost = fluid.layers.mean(fluid.layers.cross_entropy(out, label)) - accuracy = fluid.layers.accuracy(input=out, label=label) - - # delta regularization - # teacher model pre-trained on Imagenet, 1000 classes. - global_name = 't_' - t_model = ResNet101(is_test=True, global_name=global_name) - t_features, _ = t_model.net(input=image, class_dim=1000) - for f in t_features.keys(): - t_features[f].stop_gradient = True - - # delta loss. hard code for the layer name, which is just before global pooling. - delta_loss = fluid.layers.square(t_features['t_res5c.add.output.5.tmp_0'] - - features['res5c.add.output.5.tmp_0']) - delta_loss = fluid.layers.reduce_mean(delta_loss) - - params = fluid.default_main_program().global_block().all_parameters() - parameters = [] - for param in params: - if param.trainable: - if global_name in param.name: - print('\tfixing', param.name) - else: - print('\ttraining', param.name) - parameters.append(param.name) - - # optimizer, with piecewise_decay learning rate. - total_steps = len(reader_config.image_paths) * args.num_epoch // batch_size - boundaries = [int(total_steps * 2 / 3)] - print('\ttotal learning steps:', total_steps) - print('\tlr decays at:', boundaries) - values = [0.01, 0.001] - optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=boundaries, values=values), - momentum=0.9, - parameter_list=parameters, - regularization=fluid.regularizer.L2Decay(args.wd_rate)) - cur_lr = optimizer._global_learning_rate() - - optimizer.minimize( - cost + args.delta_reg * delta_loss, parameter_list=parameters) - - # data reader - feed_order = ['image', 'label'] - - # executor (session) - place = fluid.CUDAPlace( - args.use_cuda) if args.use_cuda >= 0 else fluid.CPUPlace() - exe = fluid.Executor(place) - - # running - main_program = fluid.default_main_program() - start_program = fluid.default_startup_program() - - feed_var_list_loop = [ - main_program.global_block().var(var_name) for var_name in feed_order - ] - feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) - exe.run(start_program) - - loading_parameters = {} - t_loading_parameters = {} - for p in main_program.all_parameters(): - if 'fc' not in p.name: - if global_name in p.name: - new_name = os.path.join(pretrained_model, - p.name.split(global_name)[-1]) - t_loading_parameters[new_name] = p - print(new_name, p.name) - else: - name = os.path.join(pretrained_model, p.name) - loading_parameters[name] = p - print(name, p.name) - else: - print(f'not loading {p.name}') - - load_vars_by_dict(exe, loading_parameters, main_program=main_program) - load_vars_by_dict(exe, t_loading_parameters, main_program=main_program) - - step = 0 - - # test_data = reader_creator_all_in_memory('./datasets/PetImages', is_test=True) - for e_id in range(args.num_epoch): - avg_delta_loss = AverageMeter() - avg_loss = AverageMeter() - avg_accuracy = AverageMeter() - batch_time = AverageMeter() - end = time.time() - - for step_id, data_train in enumerate(train_reader()): - wrapped_results = exe.run( - main_program, - feed=feeder.feed(data_train), - fetch_list=[cost, accuracy, delta_loss, cur_lr]) - # print(avg_loss_value[2]) - batch_time.update(time.time() - end) - end = time.time() - - avg_loss.update(wrapped_results[0][0], len(data_train)) - avg_accuracy.update(wrapped_results[1][0], len(data_train)) - avg_delta_loss.update(wrapped_results[2][0], len(data_train)) - if step % 100 == 0: - print( - f"\tEpoch {e_id}, Global_Step {step}, Batch_Time {batch_time.avg: .2f}," - f" LR {wrapped_results[3][0]}, " - f"Loss {avg_loss.avg: .4f}, Acc {avg_accuracy.avg: .4f}, Delta_Loss {avg_delta_loss.avg: .4f}" - ) - step += 1 - - if args.outdir is not None: - try: - os.makedirs(args.outdir, exist_ok=True) - fluid.io.save_params( - executor=exe, dirname=args.outdir + '/' + get_model_id()) - except: - print('\t Not saving trained parameters.') - - if e_id == args.num_epoch - 1: - print("kpis\ttrain_cost\t%f" % avg_loss.avg) - print("kpis\ttrain_acc\t%f" % avg_accuracy.avg) - - -def test(): - image_shape = [3, 224, 224] - pretrained_model = args.outdir + '/' + get_model_id() - - # data reader - batch_size = args.batch_size - reader_config = ReaderConfig( - f'{global_data_path}/{args.dataset}', is_test=True) - reader = reader_config.get_reader() - test_reader = paddle.batch(reader, batch_size) - - # model ops - image = fluid.data( - name='image', shape=[None] + image_shape, dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - model = ResNet101(is_test=True) - _, logits = model.net(input=image, class_dim=reader_config.num_classes) - out = fluid.layers.softmax(logits) - - # loss, metric - cost = fluid.layers.mean(fluid.layers.cross_entropy(out, label)) - accuracy = fluid.layers.accuracy(input=out, label=label) - - # data reader - feed_order = ['image', 'label'] - - # executor (session) - place = fluid.CUDAPlace( - args.use_cuda) if args.use_cuda >= 0 else fluid.CPUPlace() - exe = fluid.Executor(place) - - # running - main_program = fluid.default_main_program() - start_program = fluid.default_startup_program() - - feed_var_list_loop = [ - main_program.global_block().var(var_name) for var_name in feed_order - ] - feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) - exe.run(start_program) - - fluid.io.load_params(exe, pretrained_model) - - step = 0 - avg_loss = AverageMeter() - avg_accuracy = AverageMeter() - - for step_id, data_train in enumerate(test_reader()): - avg_loss_value = exe.run( - main_program, - feed=feeder.feed(data_train), - fetch_list=[cost, accuracy]) - avg_loss.update(avg_loss_value[0], len(data_train)) - avg_accuracy.update(avg_loss_value[1], len(data_train)) - if step_id % 10 == 0: - print("\nBatch %d, Loss %f, Acc %f" % (step_id, avg_loss.avg, - avg_accuracy.avg)) - step += 1 - - print("test counts:", avg_loss.count) - print("test_cost\t%f" % avg_loss.avg) - print("test_acc\t%f" % avg_accuracy.avg) - - -if __name__ == '__main__': - print(args) - train() - test() diff --git a/autodl/DELTA/models/resnet.py b/autodl/DELTA/models/resnet.py deleted file mode 100644 index 737a69b85fac9db93380d72092a756ba6a1686fb..0000000000000000000000000000000000000000 --- a/autodl/DELTA/models/resnet.py +++ /dev/null @@ -1,282 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -# from https://github.com/PaddlePaddle/models/blob/release/1.7/PaddleCV/image_classification/models/resnet.py. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152" -] - - -class ResNet(): - def __init__(self, layers=50, is_test=True, global_name=''): - self.layers = layers - self.is_test = is_test - self.features = {} - self.global_name = global_name - - def net(self, input, class_dim=1000, data_format="NCHW"): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu', - name="conv1", - data_format=data_format) - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max', - name=self.global_name + 'poo1', - data_format=data_format) - - self.features[conv.name] = conv - - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - name=conv_name, - data_format=data_format) - self.features[conv.name] = conv - - pool = fluid.layers.pool2d( - input=conv, - pool_type='avg', - global_pooling=True, - name=self.global_name + 'global_pooling', - data_format=data_format) - - self.features[pool.name] = pool - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - bias_attr=fluid.param_attr.ParamAttr( - name=self.global_name + 'fc_0.b_0'), - param_attr=fluid.param_attr.ParamAttr( - name=self.global_name + 'fc_0.w_0', - initializer=fluid.initializer.Uniform(-stdv, stdv))) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block == i == 0, - name=conv_name, - data_format=data_format) - self.features[conv.name] = conv - - pool = fluid.layers.pool2d( - input=conv, - pool_type='avg', - global_pooling=True, - name=self.global_name + 'global_pooling', - data_format=data_format) - - self.features[pool.name] = pool - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - bias_attr=fluid.param_attr.ParamAttr( - name=self.global_name + 'fc_0.b_0'), - param_attr=fluid.param_attr.ParamAttr( - name=self.global_name + 'fc_0.w_0', - initializer=fluid.initializer.Uniform(-stdv, stdv))) - return self.features, out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - data_format='NCHW'): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=self.global_name + name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1', - data_format=data_format) - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - name=self.global_name + bn_name + '.output.1', - param_attr=ParamAttr(self.global_name + bn_name + '_scale'), - bias_attr=ParamAttr(self.global_name + bn_name + '_offset'), - moving_mean_name=self.global_name + bn_name + '_mean', - moving_variance_name=self.global_name + bn_name + '_variance', - data_layout=data_format, - use_global_stats=self.is_test) - - def shortcut(self, input, ch_out, stride, is_first, name, data_format): - if data_format == 'NCHW': - ch_in = input.shape[1] - else: - ch_in = input.shape[-1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer( - input, ch_out, 1, stride, name=name, data_format=data_format) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, data_format): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a", - data_format=data_format) - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b", - data_format=data_format) - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c", - data_format=data_format) - - short = self.shortcut( - input, - num_filters * 4, - stride, - is_first=False, - name=name + "_branch1", - data_format=data_format) - - return fluid.layers.elementwise_add( - x=short, - y=conv2, - act='relu', - name=self.global_name + name + ".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name, - data_format): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a", - data_format=data_format) - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b", - data_format=data_format) - short = self.shortcut( - input, - num_filters, - stride, - is_first, - name=name + "_branch1", - data_format=data_format) - return fluid.layers.elementwise_add( - x=short, - y=conv1, - act='relu', - name=self.global_name + name + ".add.output.5") - - -def ResNet18(is_test=True, global_name=''): - model = ResNet(layers=18, is_test=is_test, global_name=global_name) - return model - - -def ResNet34(is_test=True, global_name=''): - model = ResNet(layers=34, is_test=is_test, global_name=global_name) - return model - - -def ResNet50(is_test=True, global_name=''): - model = ResNet(layers=50, is_test=is_test, global_name=global_name) - return model - - -def ResNet101(is_test=True, global_name=''): - model = ResNet(layers=101, is_test=is_test, global_name=global_name) - return model - - -def ResNet152(is_test=True, global_name=''): - model = ResNet(layers=152, is_test=is_test, global_name=global_name) - return model diff --git a/autodl/DELTA/models/resnet_vc.py b/autodl/DELTA/models/resnet_vc.py deleted file mode 100644 index 437780f09b82bb4c380ec810ff37160520377603..0000000000000000000000000000000000000000 --- a/autodl/DELTA/models/resnet_vc.py +++ /dev/null @@ -1,214 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -# from https://github.com/PaddlePaddle/models/blob/release/1.7/PaddleCV/image_classification/models/resnet_vc.py. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ["ResNet", "ResNet50_vc", "ResNet101_vc", "ResNet152_vc"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50, is_test=False, global_name=''): - self.params = train_parameters - self.layers = layers - self.is_test = is_test - self.features = {} - self.global_name = global_name - - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max', - name=self.global_name + 'poo1') - - self.features[conv.name] = conv - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - name=conv_name) - self.features[conv.name] = conv - - pool = fluid.layers.pool2d( - input=conv, - pool_type='avg', - global_pooling=True, - name=self.global_name + 'global_pooling') - - self.features[pool.name] = pool - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - bias_attr=fluid.param_attr.ParamAttr( - name=self.global_name + 'fc_0.b_0'), - param_attr=fluid.param_attr.ParamAttr( - name=self.global_name + 'fc_0.w_0', - initializer=fluid.initializer.Uniform(-stdv, stdv))) - return self.features, out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=self.global_name + name + "_weights"), - bias_attr=False, - name=self.global_name + name + '.conv2d.output.1') - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - name=self.global_name + bn_name + '.output.1', - param_attr=ParamAttr(self.global_name + bn_name + '_scale'), - bias_attr=ParamAttr(self.global_name + bn_name + '_offset'), - moving_mean_name=self.global_name + bn_name + '_mean', - moving_variance_name=self.global_name + bn_name + '_variance', - use_global_stats=self.is_test) - - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, num_filters * 4, stride, name=name + "_branch1") - - return fluid.layers.elementwise_add( - x=short, - y=conv2, - act='relu', - name=self.global_name + name + ".add.output.5") - - -def ResNet50_vc(is_test=True, global_name=''): - model = ResNet(layers=50, is_test=is_test, global_name=global_name) - return model - - -def ResNet101_vc(is_test=True, global_name=''): - model = ResNet(layers=101, is_test=is_test, global_name=global_name) - return model - - -def ResNet152_vc(is_test=True, global_name=''): - model = ResNet(layers=152, is_test=is_test, global_name=global_name) - return model diff --git a/hub_module/modules/audio/tts/deepvoice3_ljspeech/module.py b/hub_module/modules/audio/tts/deepvoice3_ljspeech/module.py deleted file mode 100644 index ec76b972f50d84ab02cbb61bddae77b9a5e9776b..0000000000000000000000000000000000000000 --- a/hub_module/modules/audio/tts/deepvoice3_ljspeech/module.py +++ /dev/null @@ -1,366 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import argparse -import ast -import importlib.util - -import nltk -import numpy as np -import paddle.fluid as fluid -import paddle.fluid.dygraph as dg -import paddlehub as hub -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving -from paddlehub.common.dir import THIRD_PARTY_HOME -from paddlehub.common.utils import mkdir -from paddlehub.common.downloader import default_downloader -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError - -lack_dependency = [] -for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]: - if not importlib.util.find_spec(dependency): - lack_dependency.append(dependency) - -# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. -_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" -_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" -nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") -tokenizers_path = os.path.join(nltk_path, "tokenizers") -corpora_path = os.path.join(nltk_path, "corpora") -punkt_path = os.path.join(tokenizers_path, "punkt") -cmudict_path = os.path.join(corpora_path, "cmudict") - -if not os.path.exists(punkt_path): - default_downloader.download_file_and_uncompress( - url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) -if not os.path.exists(cmudict_path): - default_downloader.download_file_and_uncompress( - url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) -nltk.data.path.append(nltk_path) - -if not lack_dependency: - import soundfile as sf - import librosa - import ruamel.yaml - from parakeet.utils import io - from parakeet.g2p import en - from parakeet.models.deepvoice3 import Encoder, Decoder, PostNet, SpectraNet - from parakeet.models.waveflow import WaveFlowModule - from parakeet.models.deepvoice3.weight_norm_hook import remove_weight_norm -else: - raise ImportError( - "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" - % ", ".join(lack_dependency)) - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - - -class WaveflowVocoder(object): - def __init__(self, config_path, checkpoint_path): - with open(config_path, 'rt') as f: - config = ruamel.yaml.safe_load(f) - ns = argparse.Namespace() - for k, v in config.items(): - setattr(ns, k, v) - ns.use_fp16 = False - - self.model = WaveFlowModule(ns) - io.load_parameters(self.model, checkpoint_path=checkpoint_path) - - def __call__(self, mel): - with dg.no_grad(): - self.model.eval() - audio = self.model.synthesize(mel) - self.model.train() - return audio - - -class GriffinLimVocoder(object): - def __init__(self, - sharpening_factor=1.4, - sample_rate=22050, - n_fft=1024, - win_length=1024, - hop_length=256): - self.sample_rate = sample_rate - self.n_fft = n_fft - self.sharpening_factor = sharpening_factor - self.win_length = win_length - self.hop_length = hop_length - - def __call__(self, mel): - spec = librosa.feature.inverse.mel_to_stft( - np.exp(mel), - sr=self.sample_rate, - n_fft=self.n_fft, - fmin=0, - fmax=8000.0, - power=1.0) - audio = librosa.core.griffinlim( - spec**self.sharpening_factor, - win_length=self.win_length, - hop_length=self.hop_length) - return audio - - -@moduleinfo( - name="deepvoice3_ljspeech", - version="1.0.0", - summary= - "Deep Voice 3, a fully-convolutional attention-based neural text-to-speech (TTS) system.", - author="paddlepaddle", - author_email="", - type="nlp/tts", -) -class DeepVoice3(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", - "step-1780000") - self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", - "vocoder", "step-2000000") - self.waveflow_config_path = os.path.join( - self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") - tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", - "ljspeech.yaml") - with open(tts_checkpoint_path) as f: - self.tts_config = ruamel.yaml.safe_load(f) - - with fluid.dygraph.guard(fluid.CPUPlace()): - char_embedding = dg.Embedding((en.n_vocab, - self.tts_config["char_dim"])) - multi_speaker = self.tts_config["n_speakers"] > 1 - speaker_embedding = dg.Embedding((self.tts_config["n_speakers"], self.tts_config["speaker_dim"])) \ - if multi_speaker else None - encoder = Encoder( - self.tts_config["encoder_layers"], - self.tts_config["char_dim"], - self.tts_config["encoder_dim"], - self.tts_config["kernel_size"], - has_bias=multi_speaker, - bias_dim=self.tts_config["speaker_dim"], - keep_prob=1.0 - self.tts_config["dropout"]) - decoder = Decoder( - self.tts_config["n_mels"], - self.tts_config["reduction_factor"], - list(self.tts_config["prenet_sizes"]) + - [self.tts_config["char_dim"]], - self.tts_config["decoder_layers"], - self.tts_config["kernel_size"], - self.tts_config["attention_dim"], - position_encoding_weight=self.tts_config["position_weight"], - omega=self.tts_config["position_rate"], - has_bias=multi_speaker, - bias_dim=self.tts_config["speaker_dim"], - keep_prob=1.0 - self.tts_config["dropout"]) - postnet = PostNet( - self.tts_config["postnet_layers"], - self.tts_config["char_dim"], - self.tts_config["postnet_dim"], - self.tts_config["kernel_size"], - self.tts_config["n_mels"], - self.tts_config["reduction_factor"], - has_bias=multi_speaker, - bias_dim=self.tts_config["speaker_dim"], - keep_prob=1.0 - self.tts_config["dropout"]) - self.tts_model = SpectraNet(char_embedding, speaker_embedding, - encoder, decoder, postnet) - io.load_parameters( - model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) - for name, layer in self.tts_model.named_sublayers(): - try: - remove_weight_norm(layer) - except ValueError: - # this layer has not weight norm hook - pass - - self.waveflow = WaveflowVocoder( - config_path=self.waveflow_config_path, - checkpoint_path=self.waveflow_checkpoint_path) - self.griffin = GriffinLimVocoder( - sharpening_factor=self.tts_config["sharpening_factor"], - sample_rate=self.tts_config["sample_rate"], - n_fft=self.tts_config["n_fft"], - win_length=self.tts_config["win_length"], - hop_length=self.tts_config["hop_length"]) - - def synthesize(self, texts, use_gpu=False, vocoder="griffin-lim"): - """ - Get the synthetic wavs from the texts. - - Args: - texts(list): the input texts to be predicted. - use_gpu(bool): whether use gpu to predict or not - vocoder(str): the vocoder name, "griffin-lim" or "waveflow" - - Returns: - wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. - sample_rate(int): the audio sample rate. - """ - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - - if texts and isinstance(texts, list): - predicted_data = texts - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - wavs = [] - with fluid.dygraph.guard(place): - self.tts_model.eval() - self.waveflow.model.eval() - monotonic_layers = [4] - for text in predicted_data: - # init input - logger.info("Processing sentence: %s" % text) - text = en.text_to_sequence(text, p=1.0) - text = np.expand_dims(np.array(text, dtype="int64"), 0) - lengths = np.array([text.size], dtype=np.int64) - text_seqs = dg.to_variable(text) - text_lengths = dg.to_variable(lengths) - - decoder_layers = self.tts_config["decoder_layers"] - force_monotonic_attention = [False] * decoder_layers - for i in monotonic_layers: - force_monotonic_attention[i] = True - - outputs = self.tts_model( - text_seqs, - text_lengths, - speakers=None, - force_monotonic_attention=force_monotonic_attention, - window=(self.tts_config["backward_step"], - self.tts_config["forward_step"])) - decoded, refined, attentions = outputs - if vocoder == 'griffin-lim': - # synthesis use griffin-lim - wav = self.griffin(refined.numpy()[0].T) - elif vocoder == 'waveflow': - # synthesis use waveflow - wav = self.waveflow( - fluid.layers.transpose(refined, [0, 2, 1])).numpy()[0] - else: - raise ValueError( - 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' - % vocoder) - wavs.append(wav) - return wavs, self.tts_config["sample_rate"] - - @serving - def serving_method(self, texts, use_gpu=False, vocoder="griffin-lim"): - """ - Run as a service. - """ - wavs, sample_rate = self.synthesize(texts, use_gpu, vocoder) - wavs = [wav.tolist() for wav in wavs] - result = {"wavs": wavs, "sample_rate": sample_rate} - return result - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--vocoder', - type=str, - default="griffin-lim", - choices=['griffin-lim', 'waveflow'], - help="the vocoder name") - - def add_module_output_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--output_path', - type=str, - default=os.path.abspath( - os.path.join(os.path.curdir, f"{self.name}_prediction")), - help="path to save experiment results") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_input_group = self.parser.add_argument_group( - title="Ouput options", description="Ouput path. Optional.") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - self.add_module_output_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - mkdir(args.output_path) - wavs, sample_rate = self.synthesize( - texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) - - for index, wav in enumerate(wavs): - sf.write( - os.path.join(args.output_path, f"{index}.wav"), wav, - sample_rate) - - ret = f"The synthesized wav files have been saved in {args.output_path}" - return ret - - -if __name__ == "__main__": - module = DeepVoice3() - test_text = [ - "Simple as this proposition is, it is necessary to be stated", - "Parakeet stands for Paddle PARAllel text-to-speech toolkit.", - ] - wavs, sample_rate = module.synthesize(texts=test_text, vocoder="waveflow") - for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/audio/tts/fastspeech_ljspeech/module.py b/hub_module/modules/audio/tts/fastspeech_ljspeech/module.py deleted file mode 100644 index e308d53a84f6c6307578ba58ec8ca0910c3a1aa8..0000000000000000000000000000000000000000 --- a/hub_module/modules/audio/tts/fastspeech_ljspeech/module.py +++ /dev/null @@ -1,311 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import ast -import argparse -import importlib.util - -import nltk -import paddle.fluid as fluid -import paddle.fluid.dygraph as dg -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.common.utils import mkdir -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving -from paddlehub.common.dir import THIRD_PARTY_HOME -from paddlehub.common.downloader import default_downloader - -lack_dependency = [] -for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]: - if not importlib.util.find_spec(dependency): - lack_dependency.append(dependency) - -# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. -_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" -_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" -nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") -tokenizers_path = os.path.join(nltk_path, "tokenizers") -corpora_path = os.path.join(nltk_path, "corpora") -punkt_path = os.path.join(tokenizers_path, "punkt") -cmudict_path = os.path.join(corpora_path, "cmudict") - -if not os.path.exists(punkt_path): - default_downloader.download_file_and_uncompress( - url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) -if not os.path.exists(cmudict_path): - default_downloader.download_file_and_uncompress( - url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) -nltk.data.path.append(nltk_path) - -if not lack_dependency: - import soundfile as sf - import librosa - from ruamel import yaml - from parakeet.models.fastspeech.fastspeech import FastSpeech as FastSpeechModel - from parakeet.g2p.en import text_to_sequence - from parakeet.models.transformer_tts.utils import * - from parakeet.utils import io - from parakeet.modules.weight_norm import WeightNormWrapper - from parakeet.models.waveflow import WaveFlowModule -else: - raise ImportError( - "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" - % ", ".join(lack_dependency)) - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - - -@moduleinfo( - name="fastspeech_ljspeech", - version="1.0.0", - summary= - "FastSpeech proposes a novel feed-forward network based on Transformer to generate mel-spectrogram in parallel for TTS. See https://arxiv.org/abs/1905.09263 for details.", - author="baidu-nlp", - author_email="", - type="nlp/tts", -) -class FastSpeech(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", - "step-162000") - self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", - "vocoder", "step-2000000") - self.waveflow_config_path = os.path.join( - self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") - - tts_config_path = os.path.join(self.directory, "assets", "tts", - "ljspeech.yaml") - with open(tts_config_path) as f: - self.tts_config = yaml.load(f, Loader=yaml.Loader) - with fluid.dygraph.guard(fluid.CPUPlace()): - self.tts_model = FastSpeechModel( - self.tts_config['network'], - num_mels=self.tts_config['audio']['num_mels']) - io.load_parameters( - model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) - - # Build vocoder. - args = AttrDict() - args.config = self.waveflow_config_path - args.use_fp16 = False - self.waveflow_config = io.add_yaml_config_to_args(args) - self.waveflow = WaveFlowModule(self.waveflow_config) - io.load_parameters( - model=self.waveflow, - checkpoint_path=self.waveflow_checkpoint_path) - - def synthesize(self, texts, use_gpu=False, speed=1.0, - vocoder="griffin-lim"): - """ - Get the synthetic wavs from the texts. - - Args: - texts(list): the input texts to be predicted. - use_gpu(bool): whether use gpu to predict or not. Default False. - speed(float): Controlling the voice speed. Default 1.0. - vocoder(str): the vocoder name, "griffin-lim" or "waveflow". - - Returns: - wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. - sample_rate(int): the audio sample rate. - """ - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - if texts and isinstance(texts, list): - predicted_data = texts - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - wavs = [] - with fluid.dygraph.guard(place): - self.tts_model.eval() - self.waveflow.eval() - for text in predicted_data: - # init input - logger.info("Processing sentence: %s" % text) - text = np.asarray(text_to_sequence(text)) - text = np.expand_dims(text, axis=0) - pos_text = np.arange(1, text.shape[1] + 1) - pos_text = np.expand_dims(pos_text, axis=0) - - text = dg.to_variable(text).astype(np.int64) - pos_text = dg.to_variable(pos_text).astype(np.int64) - - _, mel_output_postnet = self.tts_model( - text, pos_text, alpha=1 / speed) - - if vocoder == 'griffin-lim': - # synthesis use griffin-lim - wav = self.synthesis_with_griffinlim( - mel_output_postnet, self.tts_config['audio']) - elif vocoder == 'waveflow': - wav = self.synthesis_with_waveflow( - mel_output_postnet, self.waveflow_config.sigma) - else: - raise ValueError( - 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' - % vocoder) - wavs.append(wav) - return wavs, self.tts_config['audio']['sr'] - - def synthesis_with_griffinlim(self, mel_output, cfg): - # synthesis with griffin-lim - mel_output = fluid.layers.transpose( - fluid.layers.squeeze(mel_output, [0]), [1, 0]) - mel_output = np.exp(mel_output.numpy()) - basis = librosa.filters.mel( - cfg['sr'], - cfg['n_fft'], - cfg['num_mels'], - fmin=cfg['fmin'], - fmax=cfg['fmax']) - inv_basis = np.linalg.pinv(basis) - spec = np.maximum(1e-10, np.dot(inv_basis, mel_output)) - - wav = librosa.core.griffinlim( - spec**cfg['power'], - hop_length=cfg['hop_length'], - win_length=cfg['win_length']) - - return wav - - def synthesis_with_waveflow(self, mel_output, sigma): - mel_spectrogram = fluid.layers.transpose( - fluid.layers.squeeze(mel_output, [0]), [1, 0]) - mel_spectrogram = fluid.layers.unsqueeze(mel_spectrogram, [0]) - - for layer in self.waveflow.sublayers(): - if isinstance(layer, WeightNormWrapper): - layer.remove_weight_norm() - - # Run model inference. - wav = self.waveflow.synthesize(mel_spectrogram, sigma=sigma) - return wav.numpy()[0] - - @serving - def serving_method(self, - texts, - use_gpu=False, - speed=1.0, - vocoder="griffin-lim"): - """ - Run as a service. - """ - wavs, sample_rate = self.synthesize(texts, use_gpu, speed, vocoder) - wavs = [wav.tolist() for wav in wavs] - result = {"wavs": wavs, "sample_rate": sample_rate} - return result - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--vocoder', - type=str, - default="griffin-lim", - choices=['griffin-lim', 'waveflow'], - help="the vocoder name") - - def add_module_output_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--output_path', - type=str, - default=os.path.abspath( - os.path.join(os.path.curdir, f"{self.name}_prediction")), - help="path to save experiment results") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_input_group = self.parser.add_argument_group( - title="Ouput options", description="Ouput path. Optional.") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - self.add_module_output_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - mkdir(args.output_path) - wavs, sample_rate = self.synthesize( - texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) - - for index, wav in enumerate(wavs): - sf.write( - os.path.join(args.output_path, f"{index}.wav"), wav, - sample_rate) - - ret = f"The synthesized wav files have been saved in {args.output_path}" - return ret - - -if __name__ == "__main__": - - module = FastSpeech() - test_text = [ - "Simple as this proposition is, it is necessary to be stated", - ] - wavs, sample_rate = module.synthesize( - texts=test_text, speed=1, vocoder="waveflow") - for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/audio/tts/transformer_tts_ljspeech/module.py b/hub_module/modules/audio/tts/transformer_tts_ljspeech/module.py deleted file mode 100644 index 2f58c083d6ba9b20cb434ff58110fbd0fb6e23b3..0000000000000000000000000000000000000000 --- a/hub_module/modules/audio/tts/transformer_tts_ljspeech/module.py +++ /dev/null @@ -1,329 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import ast -import argparse -import importlib.util - -import nltk -import paddle.fluid as fluid -import paddle.fluid.dygraph as dg -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.common.utils import mkdir -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving -from paddlehub.common.dir import THIRD_PARTY_HOME -from paddlehub.common.downloader import default_downloader - -lack_dependency = [] -for dependency in ["ruamel", "parakeet", "scipy", "soundfile", "librosa"]: - if not importlib.util.find_spec(dependency): - lack_dependency.append(dependency) - -# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. -_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" -_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" -nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") -tokenizers_path = os.path.join(nltk_path, "tokenizers") -corpora_path = os.path.join(nltk_path, "corpora") -punkt_path = os.path.join(tokenizers_path, "punkt") -cmudict_path = os.path.join(corpora_path, "cmudict") - -if not os.path.exists(punkt_path): - default_downloader.download_file_and_uncompress( - url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) -if not os.path.exists(cmudict_path): - default_downloader.download_file_and_uncompress( - url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) -nltk.data.path.append(nltk_path) - -if not lack_dependency: - import soundfile as sf - import librosa - from ruamel import yaml - from scipy.io.wavfile import write - from parakeet.g2p.en import text_to_sequence - from parakeet.models.transformer_tts.utils import * - from parakeet.models.transformer_tts import TransformerTTS as TransformerTTSModel - from parakeet.models.waveflow import WaveFlowModule - from parakeet.utils import io - from parakeet.modules.weight_norm import WeightNormWrapper -else: - raise ImportError( - "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" - % ", ".join(lack_dependency)) - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - - -@moduleinfo( - name="transformer_tts_ljspeech", - version="1.0.0", - summary= - "Transformer TTS introduces and adapts the multi-head attention mechanism to replace the RNN structures and also the original attention mechanism in Tacotron2. See https://arxiv.org/abs/1809.08895 for details", - author="baidu-nlp", - author_email="", - type="nlp/tts", -) -class TransformerTTS(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", - "step-120000") - self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", - "vocoder", "step-2000000") - self.waveflow_config_path = os.path.join( - self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") - - tts_config_path = os.path.join(self.directory, "assets", "tts", - "ljspeech.yaml") - with open(tts_config_path) as f: - self.tts_config = yaml.load(f, Loader=yaml.Loader) - - # The max length of audio when synthsis. - self.max_len = 1000 - # The threshold of stop token which indicates the time step should stop generate spectrum or not. - self.stop_threshold = 0.5 - - with fluid.dygraph.guard(fluid.CPUPlace()): - # Build TTS. - with fluid.unique_name.guard(): - network_cfg = self.tts_config['network'] - self.tts_model = TransformerTTSModel( - network_cfg['embedding_size'], network_cfg['hidden_size'], - network_cfg['encoder_num_head'], - network_cfg['encoder_n_layers'], - self.tts_config['audio']['num_mels'], - network_cfg['outputs_per_step'], - network_cfg['decoder_num_head'], - network_cfg['decoder_n_layers']) - io.load_parameters( - model=self.tts_model, - checkpoint_path=self.tts_checkpoint_path) - - # Build vocoder. - args = AttrDict() - args.config = self.waveflow_config_path - args.use_fp16 = False - self.waveflow_config = io.add_yaml_config_to_args(args) - self.waveflow = WaveFlowModule(self.waveflow_config) - io.load_parameters( - model=self.waveflow, - checkpoint_path=self.waveflow_checkpoint_path) - - def synthesize(self, texts, use_gpu=False, vocoder="griffin-lim"): - """ - Get the synthetic wavs from the texts. - - Args: - texts(list): the input texts to be predicted. - use_gpu(bool): whether use gpu to predict or not - vocoder(str): the vocoder name, "griffin-lim" or "waveflow" - - Returns: - wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. - sample_rate(int): the audio sample rate. - """ - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - if texts and isinstance(texts, list): - predicted_data = texts - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - wavs = [] - with fluid.dygraph.guard(place): - self.tts_model.eval() - self.waveflow.eval() - for text in predicted_data: - # init input - logger.info("Processing sentence: %s" % text) - text = np.asarray(text_to_sequence(text)) - text = fluid.layers.unsqueeze( - dg.to_variable(text).astype(np.int64), [0]) - mel_input = dg.to_variable(np.zeros([1, 1, - 80])).astype(np.float32) - pos_text = np.arange(1, text.shape[1] + 1) - pos_text = fluid.layers.unsqueeze( - dg.to_variable(pos_text).astype(np.int64), [0]) - - for i in range(self.max_len): - pos_mel = np.arange(1, mel_input.shape[1] + 1) - pos_mel = fluid.layers.unsqueeze( - dg.to_variable(pos_mel).astype(np.int64), [0]) - mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = self.tts_model( - text, mel_input, pos_text, pos_mel) - if stop_preds.numpy()[0, -1] > self.stop_threshold: - break - mel_input = fluid.layers.concat( - [mel_input, postnet_pred[:, -1:, :]], axis=1) - if vocoder == 'griffin-lim': - # synthesis use griffin-lim - wav = self.synthesis_with_griffinlim( - postnet_pred, self.tts_config['audio']) - elif vocoder == 'waveflow': - # synthesis use waveflow - wav = self.synthesis_with_waveflow( - postnet_pred, self.waveflow_config.sigma) - else: - raise ValueError( - 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' - % vocoder) - wavs.append(wav) - return wavs, self.tts_config['audio']['sr'] - - def synthesis_with_griffinlim(self, mel_output, cfg): - # synthesis with griffin-lim - mel_output = fluid.layers.transpose( - fluid.layers.squeeze(mel_output, [0]), [1, 0]) - mel_output = np.exp(mel_output.numpy()) - basis = librosa.filters.mel( - cfg['sr'], - cfg['n_fft'], - cfg['num_mels'], - fmin=cfg['fmin'], - fmax=cfg['fmax']) - inv_basis = np.linalg.pinv(basis) - spec = np.maximum(1e-10, np.dot(inv_basis, mel_output)) - - wav = librosa.core.griffinlim( - spec**cfg['power'], - hop_length=cfg['hop_length'], - win_length=cfg['win_length']) - - return wav - - def synthesis_with_waveflow(self, mel_output, sigma): - mel_spectrogram = fluid.layers.transpose( - fluid.layers.squeeze(mel_output, [0]), [1, 0]) - mel_spectrogram = fluid.layers.unsqueeze(mel_spectrogram, [0]) - - for layer in self.waveflow.sublayers(): - if isinstance(layer, WeightNormWrapper): - layer.remove_weight_norm() - - # Run model inference. - wav = self.waveflow.synthesize(mel_spectrogram, sigma=sigma) - return wav.numpy()[0] - - @serving - def serving_method(self, texts, use_gpu=False, vocoder="griffin-lim"): - """ - Run as a service. - """ - wavs, sample_rate = self.synthesize(texts, use_gpu, vocoder) - wavs = [wav.tolist() for wav in wavs] - result = {"wavs": wavs, "sample_rate": sample_rate} - return result - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--vocoder', - type=str, - default="griffin-lim", - choices=['griffin-lim', 'waveflow'], - help="the vocoder name") - - def add_module_output_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--output_path', - type=str, - default=os.path.abspath( - os.path.join(os.path.curdir, f"{self.name}_prediction")), - help="path to save experiment results") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_input_group = self.parser.add_argument_group( - title="Ouput options", description="Ouput path. Optional.") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - self.add_module_output_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - mkdir(args.output_path) - wavs, sample_rate = self.synthesize( - texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) - - for index, wav in enumerate(wavs): - sf.write( - os.path.join(args.output_path, f"{index}.wav"), wav, - sample_rate) - - ret = f"The synthesized wav files have been saved in {args.output_path}" - return ret - - -if __name__ == "__main__": - - module = TransformerTTS() - test_text = [ - "Life was like a box of chocolates, you never know what you're gonna get.", - ] - wavs, sample_rate = module.synthesize(texts=test_text, vocoder="waveflow") - for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/demo/senta_test/module.py b/hub_module/modules/demo/senta_test/module.py deleted file mode 100644 index 8dd344f9fa91370cbcee4b305a1ceb2afeddfecc..0000000000000000000000000000000000000000 --- a/hub_module/modules/demo/senta_test/module.py +++ /dev/null @@ -1,49 +0,0 @@ -import argparse -import os - -import paddlehub as hub -from paddlehub.module.module import runnable, moduleinfo - -from senta_test.processor import load_vocab - - -@moduleinfo( - name="senta_test", - version="1.0.0", - summary="This is a PaddleHub Module. Just for test.", - author="anonymous", - author_email="", - type="nlp/sentiment_analysis", -) -class SentaTest(hub.Module): - def _initialize(self): - # add arg parser - self.parser = argparse.ArgumentParser( - description="Run the senta_test module.", - prog='hub run senta_test', - usage='%(prog)s', - add_help=True) - self.parser.add_argument( - '--input_text', type=str, default=None, help="text to predict") - - # load word dict - vocab_path = os.path.join(self.directory, "vocab.list") - self.vocab = load_vocab(vocab_path) - - def sentiment_classify(self, texts): - results = [] - for text in texts: - sentiment = "positive" - for word in self.vocab: - if word in text: - sentiment = "negative" - break - results.append({"text": text, "sentiment": sentiment}) - - return results - - @runnable - def run_cmd(self, argvs): - args = self.parser.parse_args(argvs) - texts = [args.input_text] - return self.sentiment_classify(texts) diff --git a/hub_module/modules/image/classification/darknet53_imagenet/darknet.py b/hub_module/modules/image/classification/darknet53_imagenet/darknet.py deleted file mode 100644 index 1e6e6f8f6b097314c6e3363150ddcfbb9fb59e0b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/darknet53_imagenet/darknet.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """Get the backbone of DarkNet, that is output for the 5 stages. - - :param input: Variable of input image - :type input: Variable - :Returns: The last variables of each stage. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/hub_module/modules/image/classification/darknet53_imagenet/data_feed.py b/hub_module/modules/image/classification/darknet53_imagenet/data_feed.py deleted file mode 100644 index ffeb258707b7c93eee3bab748e884a754f295f8f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/darknet53_imagenet/data_feed.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def test_reader(paths=None, images=None): - """data generator - :param paths: path to images. - :type paths: list, each element is a str - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - """ - img_list = [] - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = Image.open(img_path) - #img = cv2.imread(img_path) - img_list.append(img) - if images is not None: - for img in images: - img_list.append(Image.fromarray(np.uint8(img))) - for im in img_list: - im = process_image(im) - yield im diff --git a/hub_module/modules/image/classification/darknet53_imagenet/module.py b/hub_module/modules/image/classification/darknet53_imagenet/module.py deleted file mode 100644 index 3dc5568875b5b9904ab21fa1d0659c36cd589b0c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/darknet53_imagenet/module.py +++ /dev/null @@ -1,253 +0,0 @@ -import os -import ast -import argparse - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.io.parser import txt_parser - -from darknet53_imagenet.darknet import DarkNet -from darknet53_imagenet.processor import load_label_info -from darknet53_imagenet.data_feed import test_reader - - -@moduleinfo( - name="darknet53_imagenet", - version="1.1.0", - type="cv/classification", - summary= - "DarkNet53 is a image classfication model trained with ImageNet-2012 dataset.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class DarkNet53(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "darknet53_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.infer_prog = None - self.pred_out = None - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - input_image=None, - trainable=True, - pretrained=True, - param_prefix='', - get_prediction=False): - """Distill the Head Features, so as to perform transfer learning. - - :param input_image: image tensor. - :type input_image: - :param trainable: whether to set parameters trainable. - :type trainable: bool - :param pretrained: whether to load default pretrained model. - :type pretrained: bool - :param param_prefix: the prefix of parameters in yolo_head and backbone - :type param_prefix: str - :param get_prediction: whether to get prediction, - if True, outputs is {'bbox_out': bbox_out}, - if False, outputs is {'head_features': head_features}. - :type get_prediction: bool - """ - context_prog = input_image.block.program if input_image else fluid.Program( - ) - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - image = input_image if input_image else fluid.data( - name='image', - shape=[-1, 3, 224, 224], - dtype='float32', - lod_level=0) - backbone = DarkNet(get_prediction=get_prediction) - out = backbone(image) - inputs = {'image': image} - if get_prediction: - outputs = {'pred_out': out} - else: - outputs = {'body_feats': out} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - if not param_prefix: - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - main_program=context_prog, - predicate=_if_exist) - else: - exe.run(startup_program) - return inputs, outputs, context_prog - - def classification(self, - paths=None, - images=None, - use_gpu=False, - batch_size=1, - top_k=2): - """API of Classification. - :param paths: the path of images. - :type paths: list, each element is correspond to the path of an image. - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - :param use_gpu: whether to use gpu or not. - :type use_gpu: bool - :param batch_size: batch size. - :type batch_size: int - :param top_k : top k - :type top_k : int - """ - if self.infer_prog is None: - inputs, outputs, self.infer_prog = self.context( - trainable=False, pretrained=True, get_prediction=True) - self.infer_prog = self.infer_prog.clone(for_test=True) - self.pred_out = outputs['pred_out'] - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - all_images = [] - paths = paths if paths else [] - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = int(np.ceil(images_num / batch_size)) - - res_list = [] - top_k = max(min(top_k, 1000), 1) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - batch_data = np.array(batch_data).astype('float32') - data_tensor = PaddleTensor(batch_data.copy()) - if use_gpu: - result = self.gpu_predictor.run([data_tensor]) - else: - result = self.cpu_predictor.run([data_tensor]) - for i, res in enumerate(result[0].as_ndarray()): - res_dict = {} - pred_label = np.argsort(res)[::-1][:top_k] - for k in pred_label: - class_name = self.label_names[int(k)].split(',')[0] - max_prob = res[k] - res_dict[class_name] = max_prob - res_list.append(res_dict) - return res_list - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = [] - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.classification( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/efficientnetb0_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb0_imagenet/module.py deleted file mode 100644 index 0d2ea84fea0e8d854ad26ca56e2583499521822d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb0_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b0': (1.0, 1.0, 224, 0.2) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b0": [224, 112, 112, 56, 28, 14, 14, 7]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb0_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb0_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B0(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B0, self).__init__() - - model_name = 'efficientnet-b0' - self.name = "b0" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 320 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b0_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b0_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb0_small_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb0_small_imagenet/module.py deleted file mode 100644 index c4aee0f8e9073e377cf8c773cc0a8686b1bb4eca..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb0_small_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b0': (1.0, 1.0, 224, 0.2) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b0": [224, 112, 112, 56, 28, 14, 14, 7]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb0_small_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnet_b0_small_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B0(nn.Layer): - def __init__(self, - is_test: bool = True, - padding_type: str = 'DYNAMIC', - override_params: dict = None, - use_se: bool = False, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B0, self).__init__() - - model_name = 'efficientnet-b0' - self.name = "b0" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 320 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b0_small_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b0_small_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb1_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb1_imagenet/module.py deleted file mode 100644 index 70a0ac103c647c1de9fedef38e4db9b86479f0ae..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb1_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b1': (1.0, 1.1, 240, 0.2) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b1": [240, 120, 120, 60, 30, 15, 15, 8]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb1_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb1_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B1(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B1, self).__init__() - - model_name = 'efficientnet-b1' - self.name = "b1" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 320 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b1_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b1_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb2_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb2_imagenet/module.py deleted file mode 100644 index 6b92e01c7d8071d949f9821367801f95270fea5d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb2_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b2': (1.1, 1.2, 260, 0.3) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b2": [260, 130, 130, 65, 33, 17, 17, 9]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb2_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb2_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B2(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B2, self).__init__() - - model_name = 'efficientnet-b2' - self.name = "b2" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 352 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b2_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b2_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb3_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb3_imagenet/module.py deleted file mode 100644 index 24759985a152d46c703ab634aa264edcdd0e4dd0..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb3_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b3': (1.2, 1.4, 300, 0.3) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b3": [300, 150, 150, 75, 38, 19, 19, 10]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb3_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb3_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B3(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B3, self).__init__() - - model_name = 'efficientnet-b3' - self.name = "b3" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 384 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b3_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b3_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb4_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb4_imagenet/module.py deleted file mode 100644 index 1aebdf78c588ed020a6ce1da5268237a9b919301..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb4_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b4': (1.4, 1.8, 380, 0.4) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b4": [380, 190, 190, 95, 48, 24, 24, 12]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb4_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb4_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B4(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B4, self).__init__() - - model_name = 'efficientnet-b4' - self.name = "b4" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 448 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b4_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b4_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb5_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb5_imagenet/module.py deleted file mode 100644 index bc6254be72c21a7317eb2b6df1c83f5a27f4b0c2..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb5_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b5': (1.6, 2.2, 456, 0.4) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b5": [456, 228, 228, 114, 57, 29, 29, 15]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb5_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb5_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B5(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B5, self).__init__() - - model_name = 'efficientnet-b5' - self.name = "b5" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 512 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b5_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b5_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb6_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb6_imagenet/module.py deleted file mode 100644 index fd3a4bd20d228af32375b67b2cf4be9f9fd2d5e6..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb6_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b6': (1.8, 2.6, 528, 0.5) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b6": [528, 264, 264, 132, 66, 33, 33, 17]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb6_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb6_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B6(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B6, self).__init__() - - model_name = 'efficientnet-b6' - self.name = "b6" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 576 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b6_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b6_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/efficientnetb7_imagenet/module.py b/hub_module/modules/image/classification/efficientnetb7_imagenet/module.py deleted file mode 100644 index 9158dc5281dba7895a84cea65b7901e200f02481..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/efficientnetb7_imagenet/module.py +++ /dev/null @@ -1,758 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b7': (2.0, 3.1, 600, 0.5) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams(batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) - - -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) - - -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) - - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value - - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) - - return BlockArgs(kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) - - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) - - @staticmethod - def decode(string_list: list): - """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args - """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): - """ - Encodes a list of BlockArgs to a list of strings. - - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block - """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b7": [600, 300, 300, 150, 75, 38, 38, 19]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d(input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds(input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm(num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer(input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer(input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer(input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds(input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds(num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm(input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock(input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer(input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm(input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock(block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo(name="efficientnetb7_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb7_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B7(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B7, self).__init__() - - model_name = 'efficientnet-b7' - self.name = "b7" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures(3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 640 - - self._conv = ConvBNLayer(oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b7_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b7_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/data_feed.py b/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/module.py b/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/module.py deleted file mode 100644 index ffd4d06462e5ccb5703b6d0a21a538fdfe3af6f7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/module.py +++ /dev/null @@ -1,282 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from fix_resnext101_32x48d_wsl_imagenet.processor import postprocess, base64_to_cv2 -from fix_resnext101_32x48d_wsl_imagenet.data_feed import reader -from fix_resnext101_32x48d_wsl_imagenet.resnext101_wsl import Fix_ResNeXt101_32x48d_wsl - - -@moduleinfo( - name="fix_resnext101_32x48d_wsl_imagenet", - type="CV/image_classification", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - summary= - "fix_resnext101_32x48d_wsl is a image classfication model, this module is trained with imagenet datasets.", - version="1.0.0") -class FixResnext10132x48dwslImagenet(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self.predictor_set = False - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = Fix_ResNeXt101_32x48d_wsl() - output, feature_map = resnet_vd.net( - input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if not self.predictor_set: - self._set_config() - self.predictor_set = True - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/resnext101_wsl.py b/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/resnext101_wsl.py deleted file mode 100644 index 7c6b69e632947aa41e323ccdca9dde4b919ee52b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/resnext101_wsl.py +++ /dev/null @@ -1,180 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid -import math -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNeXt101_32x8d_wsl", "ResNeXt101_32x16d_wsl", "ResNeXt101_32x32d_wsl", - "ResNeXt101_32x48d_wsl", "Fix_ResNeXt101_32x48d_wsl" -] - - -class ResNeXt101_wsl(): - def __init__(self, layers=101, cardinality=32, width=48): - self.layers = layers - self.cardinality = cardinality - self.width = width - - def net(self, input, class_dim=1000): - layers = self.layers - cardinality = self.cardinality - width = self.width - - depth = [3, 4, 23, 3] - base_width = cardinality * width - num_filters = [base_width * i for i in [1, 2, 4, 8]] - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu', - name="conv1") #debug - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = 'layer' + str(block + 1) + "." + str(i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc.weight'), - bias_attr=fluid.param_attr.ParamAttr(name='fc.bias')) - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - if "downsample" in name: - conv_name = name + '.0' - else: - conv_name = name - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=conv_name + ".weight"), - bias_attr=False) - if "downsample" in name: - bn_name = name[:9] + 'downsample' + '.1' - else: - if "conv1" == name: - bn_name = 'bn' + name[-1] - else: - bn_name = (name[:10] if name[7:9].isdigit() else - name[:9]) + 'bn' + name[-1] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '.weight'), - bias_attr=ParamAttr(bn_name + '.bias'), - moving_mean_name=bn_name + '.running_mean', - moving_variance_name=bn_name + '.running_var', - ) - - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, cardinality, name): - cardinality = self.cardinality - width = self.width - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + ".conv1") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality, - act='relu', - name=name + ".conv2") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters // (width // 8), - filter_size=1, - act=None, - name=name + ".conv3") - - short = self.shortcut( - input, - num_filters // (width // 8), - stride, - name=name + ".downsample") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNeXt101_32x8d_wsl(): - model = ResNeXt101_wsl(cardinality=32, width=8) - return model - - -def ResNeXt101_32x16d_wsl(): - model = ResNeXt101_wsl(cardinality=32, width=16) - return model - - -def ResNeXt101_32x32d_wsl(): - model = ResNeXt101_wsl(cardinality=32, width=32) - return model - - -def ResNeXt101_32x48d_wsl(): - model = ResNeXt101_wsl(cardinality=32, width=48) - return model - - -def Fix_ResNeXt101_32x48d_wsl(): - model = ResNeXt101_wsl(cardinality=32, width=48) - return model diff --git a/hub_module/modules/image/classification/googlenet_imagenet/module.py b/hub_module/modules/image/classification/googlenet_imagenet/module.py deleted file mode 100644 index 08b60fea5b7a1c2c1eb87cec3ce29cec66bc5f30..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/googlenet_imagenet/module.py +++ /dev/null @@ -1,179 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -def xavier(channels: int, filter_size: int, name: str): - """Initialize the weights by uniform distribution.""" - stdv = (3.0 / (filter_size**2 * channels))**0.5 - param_attr = ParamAttr(initializer=Uniform(-stdv, stdv), name=name + "_weights") - return param_attr - - -class ConvLayer(nn.Layer): - """Basic conv2d layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - name: str = None): - super(ConvLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - return y - - -class Inception(nn.Layer): - """Inception block.""" - def __init__(self, - input_channels: int, - output_channels: int, - filter1: int, - filter3R: int, - filter3: int, - filter5R: int, - filter5: int, - proj: int, - name: str = None): - super(Inception, self).__init__() - - self._conv1 = ConvLayer(input_channels, filter1, 1, name="inception_" + name + "_1x1") - self._conv3r = ConvLayer(input_channels, filter3R, 1, name="inception_" + name + "_3x3_reduce") - self._conv3 = ConvLayer(filter3R, filter3, 3, name="inception_" + name + "_3x3") - self._conv5r = ConvLayer(input_channels, filter5R, 1, name="inception_" + name + "_5x5_reduce") - self._conv5 = ConvLayer(filter5R, filter5, 5, name="inception_" + name + "_5x5") - self._pool = MaxPool2d(kernel_size=3, stride=1, padding=1) - - self._convprj = ConvLayer(input_channels, proj, 1, name="inception_" + name + "_3x3_proj") - - def forward(self, inputs: paddle.Tensor): - conv1 = self._conv1(inputs) - - conv3r = self._conv3r(inputs) - conv3 = self._conv3(conv3r) - - conv5r = self._conv5r(inputs) - conv5 = self._conv5(conv5r) - - pool = self._pool(inputs) - convprj = self._convprj(pool) - - cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1) - cat = F.relu(cat) - return cat - - -@moduleinfo(name="googlenet_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="GoogleNet_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class GoogleNet(nn.Layer): - """GoogleNet model""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(GoogleNet, self).__init__() - self._conv = ConvLayer(3, 64, 7, 2, name="conv1") - self._pool = MaxPool2d(kernel_size=3, stride=2) - self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1") - self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3") - - self._ince3a = Inception(192, 192, 64, 96, 128, 16, 32, 32, name="ince3a") - self._ince3b = Inception(256, 256, 128, 128, 192, 32, 96, 64, name="ince3b") - - self._ince4a = Inception(480, 480, 192, 96, 208, 16, 48, 64, name="ince4a") - self._ince4b = Inception(512, 512, 160, 112, 224, 24, 64, 64, name="ince4b") - self._ince4c = Inception(512, 512, 128, 128, 256, 24, 64, 64, name="ince4c") - self._ince4d = Inception(512, 512, 112, 144, 288, 32, 64, 64, name="ince4d") - self._ince4e = Inception(528, 528, 256, 160, 320, 32, 128, 128, name="ince4e") - - self._ince5a = Inception(832, 832, 256, 160, 320, 32, 128, 128, name="ince5a") - self._ince5b = Inception(832, 832, 384, 192, 384, 48, 128, 128, name="ince5b") - - self._pool_5 = AvgPool2d(kernel_size=7, stride=7) - - self._drop = Dropout(p=0.4, mode="downscale_in_infer") - self._fc_out = Linear(1024, - class_dim, - weight_attr=xavier(1024, 1, "out"), - bias_attr=ParamAttr(name="out_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'googlenet_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/googlenet_imagenet.pdparams -O' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - x = self._pool(x) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._pool(x) - - x = self._ince3a(x) - x = self._ince3b(x) - x = self._pool(x) - - ince4a = self._ince4a(x) - x = self._ince4b(ince4a) - x = self._ince4c(x) - ince4d = self._ince4d(x) - x = self._ince4e(ince4d) - x = self._pool(x) - - x = self._ince5a(x) - ince5b = self._ince5b(x) - - x = self._pool_5(ince5b) - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - out = self._fc_out(x) - out = F.softmax(out) - - return out diff --git a/hub_module/modules/image/classification/inceptionv4_imagenet/module.py b/hub_module/modules/image/classification/inceptionv4_imagenet/module.py deleted file mode 100644 index e926cfe36e62fa274453ec91d0356f455bd0ac79..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/inceptionv4_imagenet/module.py +++ /dev/null @@ -1,343 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = 1, - act: str = 'relu', - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = name + "_bn" - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class InceptionStem(nn.Layer): - """InceptionV4 stem module.""" - def __init__(self): - super(InceptionStem, self).__init__() - self._conv_1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2") - self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1") - self._conv_3 = ConvBNLayer(32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1") - self._pool = MaxPool2d(kernel_size=3, stride=2, padding=0) - self._conv2 = ConvBNLayer(64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2") - self._conv1_1 = ConvBNLayer(160, 64, 1, act="relu", name="inception_stem2_3x3_reduce") - self._conv1_2 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3") - self._conv2_1 = ConvBNLayer(160, 64, 1, act="relu", name="inception_stem2_1x7_reduce") - self._conv2_2 = ConvBNLayer(64, 64, (7, 1), padding=(3, 0), act="relu", name="inception_stem2_1x7") - self._conv2_3 = ConvBNLayer(64, 64, (1, 7), padding=(0, 3), act="relu", name="inception_stem2_7x1") - self._conv2_4 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3_2") - self._conv3 = ConvBNLayer(192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2") - - def forward(self, inputs: paddle.Tensor): - conv = self._conv_1(inputs) - conv = self._conv_2(conv) - conv = self._conv_3(conv) - - pool1 = self._pool(conv) - conv2 = self._conv2(conv) - concat = paddle.concat([pool1, conv2], axis=1) - - conv1 = self._conv1_1(concat) - conv1 = self._conv1_2(conv1) - - conv2 = self._conv2_1(concat) - conv2 = self._conv2_2(conv2) - conv2 = self._conv2_3(conv2) - conv2 = self._conv2_4(conv2) - - concat = paddle.concat([conv1, conv2], axis=1) - - conv1 = self._conv3(concat) - pool1 = self._pool(concat) - - concat = paddle.concat([conv1, pool1], axis=1) - return concat - - -class InceptionA(nn.Layer): - """InceptionA module for InceptionV4.""" - def __init__(self, name: str): - super(InceptionA, self).__init__() - self._pool = AvgPool2d(kernel_size=3, stride=1, padding=1) - self._conv1 = ConvBNLayer(384, 96, 1, act="relu", name="inception_a" + name + "_1x1") - self._conv2 = ConvBNLayer(384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2") - self._conv3_1 = ConvBNLayer(384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce") - self._conv3_2 = ConvBNLayer(64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3") - self._conv4_1 = ConvBNLayer(384, 64, 1, act="relu", name="inception_a" + name + "_3x3_2_reduce") - self._conv4_2 = ConvBNLayer(64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_2") - self._conv4_3 = ConvBNLayer(96, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_3") - - def forward(self, inputs: paddle.Tensor): - pool1 = self._pool(inputs) - conv1 = self._conv1(pool1) - - conv2 = self._conv2(inputs) - - conv3 = self._conv3_1(inputs) - conv3 = self._conv3_2(conv3) - - conv4 = self._conv4_1(inputs) - conv4 = self._conv4_2(conv4) - conv4 = self._conv4_3(conv4) - - concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1) - return concat - - -class ReductionA(nn.Layer): - """ReductionA module for InceptionV4.""" - def __init__(self): - super(ReductionA, self).__init__() - self._pool = MaxPool2d(kernel_size=3, stride=2, padding=0) - self._conv2 = ConvBNLayer(384, 384, 3, stride=2, act="relu", name="reduction_a_3x3") - self._conv3_1 = ConvBNLayer(384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce") - self._conv3_2 = ConvBNLayer(192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2") - self._conv3_3 = ConvBNLayer(224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3") - - def forward(self, inputs: paddle.Tensor): - pool1 = self._pool(inputs) - conv2 = self._conv2(inputs) - conv3 = self._conv3_1(inputs) - conv3 = self._conv3_2(conv3) - conv3 = self._conv3_3(conv3) - concat = paddle.concat([pool1, conv2, conv3], axis=1) - return concat - - -class InceptionB(nn.Layer): - """InceptionB module for InceptionV4.""" - def __init__(self, name: str = None): - super(InceptionB, self).__init__() - self._pool = AvgPool2d(kernel_size=3, stride=1, padding=1) - self._conv1 = ConvBNLayer(1024, 128, 1, act="relu", name="inception_b" + name + "_1x1") - self._conv2 = ConvBNLayer(1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2") - self._conv3_1 = ConvBNLayer(1024, 192, 1, act="relu", name="inception_b" + name + "_1x7_reduce") - self._conv3_2 = ConvBNLayer(192, 224, (1, 7), padding=(0, 3), act="relu", name="inception_b" + name + "_1x7") - self._conv3_3 = ConvBNLayer(224, 256, (7, 1), padding=(3, 0), act="relu", name="inception_b" + name + "_7x1") - self._conv4_1 = ConvBNLayer(1024, 192, 1, act="relu", name="inception_b" + name + "_7x1_2_reduce") - self._conv4_2 = ConvBNLayer(192, 192, (1, 7), padding=(0, 3), act="relu", name="inception_b" + name + "_1x7_2") - self._conv4_3 = ConvBNLayer(192, 224, (7, 1), padding=(3, 0), act="relu", name="inception_b" + name + "_7x1_2") - self._conv4_4 = ConvBNLayer(224, 224, (1, 7), padding=(0, 3), act="relu", name="inception_b" + name + "_1x7_3") - self._conv4_5 = ConvBNLayer(224, 256, (7, 1), padding=(3, 0), act="relu", name="inception_b" + name + "_7x1_3") - - def forward(self, inputs: paddle.Tensor): - pool1 = self._pool(inputs) - conv1 = self._conv1(pool1) - - conv2 = self._conv2(inputs) - - conv3 = self._conv3_1(inputs) - conv3 = self._conv3_2(conv3) - conv3 = self._conv3_3(conv3) - - conv4 = self._conv4_1(inputs) - conv4 = self._conv4_2(conv4) - conv4 = self._conv4_3(conv4) - conv4 = self._conv4_4(conv4) - conv4 = self._conv4_5(conv4) - - concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1) - return concat - - -class ReductionB(nn.Layer): - """ReductionB module for InceptionV4.""" - def __init__(self): - super(ReductionB, self).__init__() - self._pool = MaxPool2d(kernel_size=3, stride=2, padding=0) - self._conv2_1 = ConvBNLayer(1024, 192, 1, act="relu", name="reduction_b_3x3_reduce") - self._conv2_2 = ConvBNLayer(192, 192, 3, stride=2, act="relu", name="reduction_b_3x3") - self._conv3_1 = ConvBNLayer(1024, 256, 1, act="relu", name="reduction_b_1x7_reduce") - self._conv3_2 = ConvBNLayer(256, 256, (1, 7), padding=(0, 3), act="relu", name="reduction_b_1x7") - self._conv3_3 = ConvBNLayer(256, 320, (7, 1), padding=(3, 0), act="relu", name="reduction_b_7x1") - self._conv3_4 = ConvBNLayer(320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2") - - def forward(self, inputs: paddle.Tensor): - pool1 = self._pool(inputs) - - conv2 = self._conv2_1(inputs) - conv2 = self._conv2_2(conv2) - - conv3 = self._conv3_1(inputs) - conv3 = self._conv3_2(conv3) - conv3 = self._conv3_3(conv3) - conv3 = self._conv3_4(conv3) - - concat = paddle.concat([pool1, conv2, conv3], axis=1) - - return concat - - -class InceptionC(nn.Layer): - """InceptionC module for InceptionV4.""" - def __init__(self, name: str = None): - super(InceptionC, self).__init__() - self._pool = AvgPool2d(kernel_size=3, stride=1, padding=1) - self._conv1 = ConvBNLayer(1536, 256, 1, act="relu", name="inception_c" + name + "_1x1") - self._conv2 = ConvBNLayer(1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2") - self._conv3_0 = ConvBNLayer(1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3") - self._conv3_1 = ConvBNLayer(384, 256, (1, 3), padding=(0, 1), act="relu", name="inception_c" + name + "_1x3") - self._conv3_2 = ConvBNLayer(384, 256, (3, 1), padding=(1, 0), act="relu", name="inception_c" + name + "_3x1") - self._conv4_0 = ConvBNLayer(1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4") - self._conv4_00 = ConvBNLayer(384, 448, (1, 3), padding=(0, 1), act="relu", name="inception_c" + name + "_1x3_2") - self._conv4_000 = ConvBNLayer(448, - 512, (3, 1), - padding=(1, 0), - act="relu", - name="inception_c" + name + "_3x1_2") - self._conv4_1 = ConvBNLayer(512, 256, (1, 3), padding=(0, 1), act="relu", name="inception_c" + name + "_1x3_3") - self._conv4_2 = ConvBNLayer(512, 256, (3, 1), padding=(1, 0), act="relu", name="inception_c" + name + "_3x1_3") - - def forward(self, inputs: paddle.Tensor): - pool1 = self._pool(inputs) - conv1 = self._conv1(pool1) - - conv2 = self._conv2(inputs) - - conv3 = self._conv3_0(inputs) - conv3_1 = self._conv3_1(conv3) - conv3_2 = self._conv3_2(conv3) - - conv4 = self._conv4_0(inputs) - conv4 = self._conv4_00(conv4) - conv4 = self._conv4_000(conv4) - conv4_1 = self._conv4_1(conv4) - conv4_2 = self._conv4_2(conv4) - - concat = paddle.concat([conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1) - - return concat - - -@moduleinfo(name="inceptionv4_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="InceptionV4_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class InceptionV4(nn.Layer): - """InceptionV4 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(InceptionV4, self).__init__() - self._inception_stem = InceptionStem() - - self._inceptionA_1 = InceptionA(name="1") - self._inceptionA_2 = InceptionA(name="2") - self._inceptionA_3 = InceptionA(name="3") - self._inceptionA_4 = InceptionA(name="4") - self._reductionA = ReductionA() - - self._inceptionB_1 = InceptionB(name="1") - self._inceptionB_2 = InceptionB(name="2") - self._inceptionB_3 = InceptionB(name="3") - self._inceptionB_4 = InceptionB(name="4") - self._inceptionB_5 = InceptionB(name="5") - self._inceptionB_6 = InceptionB(name="6") - self._inceptionB_7 = InceptionB(name="7") - self._reductionB = ReductionB() - - self._inceptionC_1 = InceptionC(name="1") - self._inceptionC_2 = InceptionC(name="2") - self._inceptionC_3 = InceptionC(name="3") - - self.avg_pool = AdaptiveAvgPool2d(1) - self._drop = Dropout(p=0.2, mode="downscale_in_infer") - stdv = 1.0 / math.sqrt(1536 * 1.0) - self.out = Linear(1536, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="final_fc_weights"), - bias_attr=ParamAttr(name="final_fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'inceptionv4_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/inceptionv4_imagenet.pdparams -O' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs): - x = self._inception_stem(inputs) - - x = self._inceptionA_1(x) - x = self._inceptionA_2(x) - x = self._inceptionA_3(x) - x = self._inceptionA_4(x) - x = self._reductionA(x) - - x = self._inceptionB_1(x) - x = self._inceptionB_2(x) - x = self._inceptionB_3(x) - x = self._inceptionB_4(x) - x = self._inceptionB_5(x) - x = self._inceptionB_6(x) - x = self._inceptionB_7(x) - x = self._reductionB(x) - - x = self._inceptionC_1(x) - x = self._inceptionC_2(x) - x = self._inceptionC_3(x) - - x = self.avg_pool(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._drop(x) - x = self.out(x) - return x diff --git a/hub_module/modules/image/classification/mobilenet_v1_imagenet/module.py b/hub_module/modules/image/classification/mobilenet_v1_imagenet/module.py deleted file mode 100644 index 9afa6809a92258cecef00970787e4ab8fe4e7991..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v1_imagenet/module.py +++ /dev/null @@ -1,241 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import MSRA -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - filter_size: int, - num_filters: int, - stride: int, - padding: int, - channels: int = None, - num_groups: int = 1, - act: str = 'relu', - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(initializer=MSRA(), name=name + "_weights"), - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name + "_bn_scale"), - bias_attr=ParamAttr(name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class DepthwiseSeparable(nn.Layer): - """Depthwise and pointwise conv layer.""" - def __init__(self, - num_channels: int, - num_filters1: int, - num_filters2: int, - num_groups: int, - stride: int, - scale: float, - name: str = None): - super(DepthwiseSeparable, self).__init__() - - self._depthwise_conv = ConvBNLayer(num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=3, - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - name=name + "_dw") - - self._pointwise_conv = ConvBNLayer(num_channels=int(num_filters1 * scale), - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - - def forward(self, inputs: paddle.Tensor): - y = self._depthwise_conv(inputs) - y = self._pointwise_conv(y) - return y - - -@moduleinfo(name="mobilenet_v1_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="mobilenet_v1_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class MobileNetV1(nn.Layer): - """MobileNetV1""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(MobileNetV1, self).__init__() - self.block_list = [] - - self.conv1 = ConvBNLayer(num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32), - stride=2, - padding=1, - name="conv1") - - conv2_1 = self.add_sublayer("conv2_1", - sublayer=DepthwiseSeparable(num_channels=int(32), - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, - scale=1, - name="conv2_1")) - self.block_list.append(conv2_1) - - conv2_2 = self.add_sublayer("conv2_2", - sublayer=DepthwiseSeparable(num_channels=int(64), - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, - scale=1, - name="conv2_2")) - self.block_list.append(conv2_2) - - conv3_1 = self.add_sublayer("conv3_1", - sublayer=DepthwiseSeparable(num_channels=int(128), - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, - scale=1, - name="conv3_1")) - self.block_list.append(conv3_1) - - conv3_2 = self.add_sublayer("conv3_2", - sublayer=DepthwiseSeparable(num_channels=int(128), - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, - scale=1, - name="conv3_2")) - self.block_list.append(conv3_2) - - conv4_1 = self.add_sublayer("conv4_1", - sublayer=DepthwiseSeparable(num_channels=int(256), - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, - scale=1, - name="conv4_1")) - self.block_list.append(conv4_1) - - conv4_2 = self.add_sublayer("conv4_2", - sublayer=DepthwiseSeparable(num_channels=int(256), - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, - scale=1, - name="conv4_2")) - self.block_list.append(conv4_2) - - for i in range(5): - conv5 = self.add_sublayer("conv5_" + str(i + 1), - sublayer=DepthwiseSeparable(num_channels=int(512), - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, - scale=1, - name="conv5_" + str(i + 1))) - self.block_list.append(conv5) - - conv5_6 = self.add_sublayer("conv5_6", - sublayer=DepthwiseSeparable(num_channels=int(512), - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, - scale=1, - name="conv5_6")) - self.block_list.append(conv5_6) - - conv6 = self.add_sublayer("conv6", - sublayer=DepthwiseSeparable(num_channels=int(1024), - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, - scale=1, - name="conv6")) - self.block_list.append(conv6) - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.out = Linear(int(1024), - class_dim, - weight_attr=ParamAttr(initializer=MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'mobilenet_v1_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v1_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1(inputs) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, 1024]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/mobilenet_v1_imagenet_ssld/module.py b/hub_module/modules/image/classification/mobilenet_v1_imagenet_ssld/module.py deleted file mode 100644 index d0b79a5446be35f1898051e20d0740c876ce4a9d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v1_imagenet_ssld/module.py +++ /dev/null @@ -1,241 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import MSRA -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - filter_size: int, - num_filters: int, - stride: int, - padding: int, - channels: int = None, - num_groups: int = 1, - act: str = 'relu', - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(initializer=MSRA(), name=name + "_weights"), - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name + "_bn_scale"), - bias_attr=ParamAttr(name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class DepthwiseSeparable(nn.Layer): - """Depthwise and pointwise conv layer.""" - def __init__(self, - num_channels: int, - num_filters1: int, - num_filters2: int, - num_groups: int, - stride: int, - scale: float, - name: str = None): - super(DepthwiseSeparable, self).__init__() - - self._depthwise_conv = ConvBNLayer(num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=3, - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - name=name + "_dw") - - self._pointwise_conv = ConvBNLayer(num_channels=int(num_filters1 * scale), - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - - def forward(self, inputs: paddle.Tensor): - y = self._depthwise_conv(inputs) - y = self._pointwise_conv(y) - return y - - -@moduleinfo(name="mobilenet_v1_imagenet_ssld", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="mobilenet_v1_imagenet_ssld is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class MobileNetV1(nn.Layer): - """MobileNetV1""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(MobileNetV1, self).__init__() - self.block_list = [] - - self.conv1 = ConvBNLayer(num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32), - stride=2, - padding=1, - name="conv1") - - conv2_1 = self.add_sublayer("conv2_1", - sublayer=DepthwiseSeparable(num_channels=int(32), - num_filters1=32, - num_filters2=64, - num_groups=32, - stride=1, - scale=1, - name="conv2_1")) - self.block_list.append(conv2_1) - - conv2_2 = self.add_sublayer("conv2_2", - sublayer=DepthwiseSeparable(num_channels=int(64), - num_filters1=64, - num_filters2=128, - num_groups=64, - stride=2, - scale=1, - name="conv2_2")) - self.block_list.append(conv2_2) - - conv3_1 = self.add_sublayer("conv3_1", - sublayer=DepthwiseSeparable(num_channels=int(128), - num_filters1=128, - num_filters2=128, - num_groups=128, - stride=1, - scale=1, - name="conv3_1")) - self.block_list.append(conv3_1) - - conv3_2 = self.add_sublayer("conv3_2", - sublayer=DepthwiseSeparable(num_channels=int(128), - num_filters1=128, - num_filters2=256, - num_groups=128, - stride=2, - scale=1, - name="conv3_2")) - self.block_list.append(conv3_2) - - conv4_1 = self.add_sublayer("conv4_1", - sublayer=DepthwiseSeparable(num_channels=int(256), - num_filters1=256, - num_filters2=256, - num_groups=256, - stride=1, - scale=1, - name="conv4_1")) - self.block_list.append(conv4_1) - - conv4_2 = self.add_sublayer("conv4_2", - sublayer=DepthwiseSeparable(num_channels=int(256), - num_filters1=256, - num_filters2=512, - num_groups=256, - stride=2, - scale=1, - name="conv4_2")) - self.block_list.append(conv4_2) - - for i in range(5): - conv5 = self.add_sublayer("conv5_" + str(i + 1), - sublayer=DepthwiseSeparable(num_channels=int(512), - num_filters1=512, - num_filters2=512, - num_groups=512, - stride=1, - scale=1, - name="conv5_" + str(i + 1))) - self.block_list.append(conv5) - - conv5_6 = self.add_sublayer("conv5_6", - sublayer=DepthwiseSeparable(num_channels=int(512), - num_filters1=512, - num_filters2=1024, - num_groups=512, - stride=2, - scale=1, - name="conv5_6")) - self.block_list.append(conv5_6) - - conv6 = self.add_sublayer("conv6", - sublayer=DepthwiseSeparable(num_channels=int(1024), - num_filters1=1024, - num_filters2=1024, - num_groups=1024, - stride=1, - scale=1, - name="conv6")) - self.block_list.append(conv6) - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.out = Linear(int(1024), - class_dim, - weight_attr=ParamAttr(initializer=MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'mobilenet_v1_ssld_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v1_ssld_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1(inputs) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, 1024]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/data_feed.py b/hub_module/modules/image/classification/mobilenet_v2_animals/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_animals/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py b/hub_module/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py deleted file mode 100644 index 2c5654648e4fe16afec54cdd643ef2a481b6d3dc..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py +++ /dev/null @@ -1,200 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['MobileNetV2'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class MobileNetV2(): - def __init__(self): - self.params = train_parameters - - def net(self, input, class_dim=1000, scale=1.0): - - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - - #conv1 - input = self.conv_bn_layer( - input, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1, - if_act=True, - name='conv1_1') - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input = self.invresi_blocks( - input=input, - in_c=in_c, - t=t, - c=int(c * scale), - n=n, - s=s, - name='conv' + str(i)) - in_c = int(c * scale) - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d( - input=input, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - output = fluid.layers.fc( - input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output, input - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out - else: - return linear_out - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/module.py b/hub_module/modules/image/classification/mobilenet_v2_animals/module.py deleted file mode 100644 index b8afcae07c3fbdc082c821faa288c29bb34b1982..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_animals/module.py +++ /dev/null @@ -1,278 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from mobilenet_v2_animals.processor import postprocess, base64_to_cv2 -from mobilenet_v2_animals.data_feed import reader -from mobilenet_v2_animals.mobilenet_v2 import MobileNetV2 - - -@moduleinfo( - name="mobilenet_v2_animals", - type="CV/image_classification", - author="baidu-vis", - author_email="", - summary= - "Mobilenet_V2 is a image classfication model, this module is trained with Baidu's self-built animals dataset.", - version="1.0.0") -class MobileNetV2Animals(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - mobile_net = MobileNetV2() - output, feature_map = mobile_net.net( - input=image, class_dim=len(self.label_list), scale=1.0) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/data_feed.py b/hub_module/modules/image/classification/mobilenet_v2_dishes/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_dishes/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py b/hub_module/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py deleted file mode 100644 index 2c5654648e4fe16afec54cdd643ef2a481b6d3dc..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py +++ /dev/null @@ -1,200 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['MobileNetV2'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class MobileNetV2(): - def __init__(self): - self.params = train_parameters - - def net(self, input, class_dim=1000, scale=1.0): - - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - - #conv1 - input = self.conv_bn_layer( - input, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1, - if_act=True, - name='conv1_1') - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input = self.invresi_blocks( - input=input, - in_c=in_c, - t=t, - c=int(c * scale), - n=n, - s=s, - name='conv' + str(i)) - in_c = int(c * scale) - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d( - input=input, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - output = fluid.layers.fc( - input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output, input - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out - else: - return linear_out - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/module.py b/hub_module/modules/image/classification/mobilenet_v2_dishes/module.py deleted file mode 100644 index f1be00a305e164b363bb9c8266833f5a986a52a5..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_dishes/module.py +++ /dev/null @@ -1,278 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from mobilenet_v2_dishes.processor import postprocess, base64_to_cv2 -from mobilenet_v2_dishes.data_feed import reader -from mobilenet_v2_dishes.mobilenet_v2 import MobileNetV2 - - -@moduleinfo( - name="mobilenet_v2_dishes", - type="CV/image_classification", - author="baidu-vis", - author_email="", - summary= - "Mobilenet_V2 is a image classfication model, this module is trained with Baidu's self-built dishes dataset.", - version="1.0.0") -class MobileNetV2Dishes(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - mobile_net = MobileNetV2() - output, feature_map = mobile_net.net( - input=image, class_dim=len(self.label_list), scale=1.0) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR. . - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/mobilenet_v2_imagenet/module.py b/hub_module/modules/image/classification/mobilenet_v2_imagenet/module.py deleted file mode 100644 index 573c31a559d86993518f49faaa4569d2c20017ed..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_imagenet/module.py +++ /dev/null @@ -1,209 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - filter_size: int, - num_filters: int, - stride: int, - padding: int, - num_groups: int = 1, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, - param_attr=ParamAttr(name=name + "_bn_scale"), - bias_attr=ParamAttr(name=name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, inputs: paddle.Tensor, if_act: bool = True): - y = self._conv(inputs) - y = self._batch_norm(y) - if if_act: - y = F.relu6(y) - return y - - -class InvertedResidualUnit(nn.Layer): - """Inverted Residual unit.""" - def __init__(self, num_channels: int, num_in_filter: int, num_filters: int, stride: int, filter_size: int, - padding: int, expansion_factor: int, name: str): - super(InvertedResidualUnit, self).__init__() - - num_expfilter = int(round(num_in_filter * expansion_factor)) - self._expand_conv = ConvBNLayer(num_channels=num_channels, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - name=name + "_expand") - - self._bottleneck_conv = ConvBNLayer(num_channels=num_expfilter, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - name=name + "_dwise") - - self._linear_conv = ConvBNLayer(num_channels=num_expfilter, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - name=name + "_linear") - - def forward(self, inputs: paddle.Tensor, ifshortcut: bool): - y = self._expand_conv(inputs, if_act=True) - y = self._bottleneck_conv(y, if_act=True) - y = self._linear_conv(y, if_act=False) - if ifshortcut: - y = paddle.elementwise_add(inputs, y) - return y - - -class InversiBlocks(nn.Layer): - """Inverted residual block composed by inverted residual unit.""" - def __init__(self, in_c: int, t: int, c: int, n: int, s: int, name: str): - super(InversiBlocks, self).__init__() - - self._first_block = InvertedResidualUnit(num_channels=in_c, - num_in_filter=in_c, - num_filters=c, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + "_1") - - self._block_list = [] - for i in range(1, n): - block = self.add_sublayer(name + "_" + str(i + 1), - sublayer=InvertedResidualUnit(num_channels=c, - num_in_filter=c, - num_filters=c, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + "_" + str(i + 1))) - self._block_list.append(block) - - def forward(self, inputs: paddle.Tensor): - y = self._first_block(inputs, ifshortcut=False) - for block in self._block_list: - y = block(y, ifshortcut=True) - return y - - -@moduleinfo(name="mobilenet_v2_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="mobilenet_v2_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class MobileNet(nn.Layer): - """MobileNetV2""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(MobileNet, self).__init__() - - self.class_dim = class_dim - - bottleneck_params_list = [(1, 16, 1, 1), (6, 24, 2, 2), (6, 32, 3, 2), (6, 64, 4, 2), (6, 96, 3, 1), - (6, 160, 3, 2), (6, 320, 1, 1)] - - self.conv1 = ConvBNLayer(num_channels=3, - num_filters=int(32), - filter_size=3, - stride=2, - padding=1, - name="conv1_1") - - self.block_list = [] - i = 1 - in_c = int(32) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - block = self.add_sublayer("conv" + str(i), - sublayer=InversiBlocks(in_c=in_c, t=t, c=int(c), n=n, s=s, name="conv" + str(i))) - self.block_list.append(block) - in_c = int(c) - - self.out_c = 1280 - self.conv9 = ConvBNLayer(num_channels=in_c, - num_filters=self.out_c, - filter_size=1, - stride=1, - padding=0, - name="conv9") - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.out = Linear(self.out_c, - class_dim, - weight_attr=ParamAttr(name="fc10_weights"), - bias_attr=ParamAttr(name="fc10_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'mobilenet_v2_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v2_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1(inputs, if_act=True) - for block in self.block_list: - y = block(y) - y = self.conv9(y, if_act=True) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.out_c]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/mobilenet_v2_imagenet_ssld/module.py b/hub_module/modules/image/classification/mobilenet_v2_imagenet_ssld/module.py deleted file mode 100644 index 510b9d14c35e565d4e3eab3d36220c805cca3b6d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v2_imagenet_ssld/module.py +++ /dev/null @@ -1,209 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - filter_size: int, - num_filters: int, - stride: int, - padding: int, - num_groups: int = 1, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, - param_attr=ParamAttr(name=name + "_bn_scale"), - bias_attr=ParamAttr(name=name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, inputs: paddle.Tensor, if_act: bool = True): - y = self._conv(inputs) - y = self._batch_norm(y) - if if_act: - y = F.relu6(y) - return y - - -class InvertedResidualUnit(nn.Layer): - """Inverted Residual unit.""" - def __init__(self, num_channels: int, num_in_filter: int, num_filters: int, stride: int, filter_size: int, - padding: int, expansion_factor: int, name: str): - super(InvertedResidualUnit, self).__init__() - - num_expfilter = int(round(num_in_filter * expansion_factor)) - self._expand_conv = ConvBNLayer(num_channels=num_channels, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - name=name + "_expand") - - self._bottleneck_conv = ConvBNLayer(num_channels=num_expfilter, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - name=name + "_dwise") - - self._linear_conv = ConvBNLayer(num_channels=num_expfilter, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - name=name + "_linear") - - def forward(self, inputs: paddle.Tensor, ifshortcut: bool): - y = self._expand_conv(inputs, if_act=True) - y = self._bottleneck_conv(y, if_act=True) - y = self._linear_conv(y, if_act=False) - if ifshortcut: - y = paddle.elementwise_add(inputs, y) - return y - - -class InversiBlocks(nn.Layer): - """Inverted residual block composed by inverted residual unit.""" - def __init__(self, in_c: int, t: int, c: int, n: int, s: int, name: str): - super(InversiBlocks, self).__init__() - - self._first_block = InvertedResidualUnit(num_channels=in_c, - num_in_filter=in_c, - num_filters=c, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + "_1") - - self._block_list = [] - for i in range(1, n): - block = self.add_sublayer(name + "_" + str(i + 1), - sublayer=InvertedResidualUnit(num_channels=c, - num_in_filter=c, - num_filters=c, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + "_" + str(i + 1))) - self._block_list.append(block) - - def forward(self, inputs: paddle.Tensor): - y = self._first_block(inputs, ifshortcut=False) - for block in self._block_list: - y = block(y, ifshortcut=True) - return y - - -@moduleinfo(name="mobilenet_v2_imagenet_ssld", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="mobilenet_v2_imagenet_ssld is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class MobileNet(nn.Layer): - """MobileNetV2""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(MobileNet, self).__init__() - - self.class_dim = class_dim - - bottleneck_params_list = [(1, 16, 1, 1), (6, 24, 2, 2), (6, 32, 3, 2), (6, 64, 4, 2), (6, 96, 3, 1), - (6, 160, 3, 2), (6, 320, 1, 1)] - - self.conv1 = ConvBNLayer(num_channels=3, - num_filters=int(32), - filter_size=3, - stride=2, - padding=1, - name="conv1_1") - - self.block_list = [] - i = 1 - in_c = int(32) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - block = self.add_sublayer("conv" + str(i), - sublayer=InversiBlocks(in_c=in_c, t=t, c=int(c), n=n, s=s, name="conv" + str(i))) - self.block_list.append(block) - in_c = int(c) - - self.out_c = 1280 - self.conv9 = ConvBNLayer(num_channels=in_c, - num_filters=self.out_c, - filter_size=1, - stride=1, - padding=0, - name="conv9") - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.out = Linear(self.out_c, - class_dim, - weight_attr=ParamAttr(name="fc10_weights"), - bias_attr=ParamAttr(name="fc10_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'mobilenet_v2_ssld.pdparams.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v2_ssld.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1(inputs, if_act=True) - for block in self.block_list: - y = block(y) - y = self.conv9(y, if_act=True) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.out_c]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py deleted file mode 100644 index 7b247cb77837fc45871f2daa023e2fca6a9f5c48..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py +++ /dev/null @@ -1,286 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.regularizer import L2Decay -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -def make_divisible(v: int, divisor: int = 8, min_value: int = None): - """ - This function is taken from the original tf repo. - It ensures that all layers have a channel number that is divisible by 8 - It can be seen here: - https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - """ - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -@moduleinfo(name="mobilenet_v3_large_imagenet_ssld", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="mobilenet_v3_large_imagenet_ssld is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class MobileNetV3Large(nn.Layer): - """MobileNetV3Large module.""" - def __init__(self, dropout_prob: float = 0.2, class_dim: int = 1000, load_checkpoint: str = None): - super(MobileNetV3Large, self).__init__() - - inplanes = 16 - self.cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, "relu", 1], - [3, 64, 24, False, "relu", 2], - [3, 72, 24, False, "relu", 1], - [5, 72, 40, True, "relu", 2], - [5, 120, 40, True, "relu", 1], - [5, 120, 40, True, "relu", 1], - [3, 240, 80, False, "hard_swish", 2], - [3, 200, 80, False, "hard_swish", 1], - [3, 184, 80, False, "hard_swish", 1], - [3, 184, 80, False, "hard_swish", 1], - [3, 480, 112, True, "hard_swish", 1], - [3, 672, 112, True, "hard_swish", 1], - [5, 672, 160, True, "hard_swish", 2], - [5, 960, 160, True, "hard_swish", 1], - [5, 960, 160, True, "hard_swish", 1] - ] - self.cls_ch_squeeze = 960 - self.cls_ch_expand = 1280 - - self.conv1 = ConvBNLayer(in_c=3, - out_c=make_divisible(inplanes), - filter_size=3, - stride=2, - padding=1, - num_groups=1, - if_act=True, - act="hard_swish", - name="conv1") - - self.block_list = [] - i = 0 - inplanes = make_divisible(inplanes) - for (k, exp, c, se, nl, s) in self.cfg: - self.block_list.append( - ResidualUnit(in_c=inplanes, - mid_c=make_divisible(exp), - out_c=make_divisible(c), - filter_size=k, - stride=s, - use_se=se, - act=nl, - name="conv" + str(i + 2))) - self.add_sublayer(sublayer=self.block_list[-1], name="conv" + str(i + 2)) - inplanes = make_divisible(c) - i += 1 - - self.last_second_conv = ConvBNLayer(in_c=inplanes, - out_c=make_divisible(self.cls_ch_squeeze), - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act="hard_swish", - name="conv_last") - - self.pool = AdaptiveAvgPool2d(1) - - self.last_conv = Conv2d(in_channels=make_divisible(self.cls_ch_squeeze), - out_channels=self.cls_ch_expand, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name="last_1x1_conv_weights"), - bias_attr=False) - - self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") - - self.out = Linear(self.cls_ch_expand, - class_dim, - weight_attr=ParamAttr("fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'mobilenet_v3_large_ssld.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v3_large_ssld.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self.conv1(inputs) - for block in self.block_list: - x = block(x) - - x = self.last_second_conv(x) - x = self.pool(x) - - x = self.last_conv(x) - x = F.hard_swish(x) - x = self.dropout(x) - x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) - x = self.out(x) - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - in_c: int, - out_c: int, - filter_size: int, - stride: int, - padding: int, - num_groups: int = 1, - if_act: bool = True, - act: str = None, - name: str = ""): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = Conv2d(in_channels=in_c, - out_channels=out_c, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - self.bn = BatchNorm(num_channels=out_c, - act=None, - param_attr=ParamAttr(name=name + "_bn_scale", regularizer=L2Decay(0.0)), - bias_attr=ParamAttr(name=name + "_bn_offset", regularizer=L2Decay(0.0)), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, x: paddle.Tensor): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hard_swish": - x = F.hard_swish(x) - else: - print("The activation function is selected incorrectly.") - exit() - return x - - -class ResidualUnit(nn.Layer): - """Residual unit for MobileNetV3.""" - def __init__(self, - in_c: int, - mid_c: int, - out_c: int, - filter_size: int, - stride: int, - use_se: int, - act: str = None, - name: str = ''): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_c == out_c - self.if_se = use_se - - self.expand_conv = ConvBNLayer(in_c=in_c, - out_c=mid_c, - filter_size=1, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + "_expand") - self.bottleneck_conv = ConvBNLayer(in_c=mid_c, - out_c=mid_c, - filter_size=filter_size, - stride=stride, - padding=int((filter_size - 1) // 2), - num_groups=mid_c, - if_act=True, - act=act, - name=name + "_depthwise") - if self.if_se: - self.mid_se = SEModule(mid_c, name=name + "_se") - self.linear_conv = ConvBNLayer(in_c=mid_c, - out_c=out_c, - filter_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name=name + "_linear") - - def forward(self, inputs: paddle.Tensor): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = paddle.elementwise_add(inputs, x) - return x - - -class SEModule(nn.Layer): - """Basic model for ResidualUnit.""" - def __init__(self, channel: int, reduction: int = 4, name: str = ""): - super(SEModule, self).__init__() - self.avg_pool = AdaptiveAvgPool2d(1) - self.conv1 = Conv2d(in_channels=channel, - out_channels=channel // reduction, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name=name + "_1_weights"), - bias_attr=ParamAttr(name=name + "_1_offset")) - self.conv2 = Conv2d(in_channels=channel // reduction, - out_channels=channel, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name + "_2_weights"), - bias_attr=ParamAttr(name=name + "_2_offset")) - - def forward(self, inputs: paddle.Tensor): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hard_sigmoid(outputs) - return paddle.multiply(x=inputs, y=outputs, axis=0) diff --git a/hub_module/modules/image/classification/mobilenet_v3_small_imagenet_ssld/module.py b/hub_module/modules/image/classification/mobilenet_v3_small_imagenet_ssld/module.py deleted file mode 100644 index 5924f00b0a90be4027f90e065cdb19d4d88835e3..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/mobilenet_v3_small_imagenet_ssld/module.py +++ /dev/null @@ -1,276 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.regularizer import L2Decay -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -def make_divisible(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -@moduleinfo(name="mobilenet_v3_small_imagenet_ssld", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="mobilenet_v3_small_imagenet_ssld is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class MobileNetV3Small(nn.Layer): - """MobileNetV3Small module.""" - def __init__(self, dropout_prob: float = 0.2, class_dim: int = 1000, load_checkpoint: str = None): - super(MobileNetV3Small, self).__init__() - - inplanes = 16 - self.cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, "relu", 2], - [3, 72, 24, False, "relu", 2], - [3, 88, 24, False, "relu", 1], - [5, 96, 40, True, "hard_swish", 2], - [5, 240, 40, True, "hard_swish", 1], - [5, 240, 40, True, "hard_swish", 1], - [5, 120, 48, True, "hard_swish", 1], - [5, 144, 48, True, "hard_swish", 1], - [5, 288, 96, True, "hard_swish", 2], - [5, 576, 96, True, "hard_swish", 1], - [5, 576, 96, True, "hard_swish", 1], - ] - self.cls_ch_squeeze = 576 - self.cls_ch_expand = 1280 - - self.conv1 = ConvBNLayer(in_c=3, - out_c=make_divisible(inplanes), - filter_size=3, - stride=2, - padding=1, - num_groups=1, - if_act=True, - act="hard_swish", - name="conv1") - - self.block_list = [] - i = 0 - inplanes = make_divisible(inplanes) - for (k, exp, c, se, nl, s) in self.cfg: - self.block_list.append( - ResidualUnit(in_c=inplanes, - mid_c=make_divisible(exp), - out_c=make_divisible(c), - filter_size=k, - stride=s, - use_se=se, - act=nl, - name="conv" + str(i + 2))) - self.add_sublayer(sublayer=self.block_list[-1], name="conv" + str(i + 2)) - inplanes = make_divisible(c) - i += 1 - - self.last_second_conv = ConvBNLayer(in_c=inplanes, - out_c=make_divisible(self.cls_ch_squeeze), - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act="hard_swish", - name="conv_last") - - self.pool = AdaptiveAvgPool2d(1) - - self.last_conv = Conv2d(in_channels=make_divisible(self.cls_ch_squeeze), - out_channels=self.cls_ch_expand, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name="last_1x1_conv_weights"), - bias_attr=False) - - self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") - - self.out = Linear(self.cls_ch_expand, - class_dim, - weight_attr=ParamAttr("fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'mobilenet_v3_small_ssld.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v3_small_ssld.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self.conv1(inputs) - for block in self.block_list: - x = block(x) - - x = self.last_second_conv(x) - x = self.pool(x) - - x = self.last_conv(x) - x = F.hard_swish(x) - x = self.dropout(x) - x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) - x = self.out(x) - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - in_c: int, - out_c: int, - filter_size: int, - stride: int, - padding: int, - num_groups: int = 1, - if_act: bool = True, - act: str = None, - name: str = ""): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = Conv2d(in_channels=in_c, - out_channels=out_c, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - self.bn = BatchNorm(num_channels=out_c, - act=None, - param_attr=ParamAttr(name=name + "_bn_scale", regularizer=L2Decay(0.0)), - bias_attr=ParamAttr(name=name + "_bn_offset", regularizer=L2Decay(0.0)), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hard_swish": - x = F.hard_swish(x) - else: - print("The activation function is selected incorrectly.") - exit() - return x - - -class ResidualUnit(nn.Layer): - """Residual unit for MobileNetV3.""" - def __init__(self, - in_c: int, - mid_c: int, - out_c: int, - filter_size: int, - stride: int, - use_se: bool, - act: str = None, - name: str = ''): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_c == out_c - self.if_se = use_se - - self.expand_conv = ConvBNLayer(in_c=in_c, - out_c=mid_c, - filter_size=1, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + "_expand") - self.bottleneck_conv = ConvBNLayer(in_c=mid_c, - out_c=mid_c, - filter_size=filter_size, - stride=stride, - padding=int((filter_size - 1) // 2), - num_groups=mid_c, - if_act=True, - act=act, - name=name + "_depthwise") - if self.if_se: - self.mid_se = SEModule(mid_c, name=name + "_se") - self.linear_conv = ConvBNLayer(in_c=mid_c, - out_c=out_c, - filter_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name=name + "_linear") - - def forward(self, inputs: paddle.Tensor): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = paddle.elementwise_add(inputs, x) - return x - - -class SEModule(nn.Layer): - """Basic model for ResidualUnit.""" - def __init__(self, channel: int, reduction: int = 4, name: str = ""): - super(SEModule, self).__init__() - self.avg_pool = AdaptiveAvgPool2d(1) - self.conv1 = Conv2d(in_channels=channel, - out_channels=channel // reduction, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name=name + "_1_weights"), - bias_attr=ParamAttr(name=name + "_1_offset")) - self.conv2 = Conv2d(in_channels=channel // reduction, - out_channels=channel, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name + "_2_weights"), - bias_attr=ParamAttr(name=name + "_2_offset")) - - def forward(self, inputs: paddle.Tensor): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hard_sigmoid(outputs) - return paddle.multiply(x=inputs, y=outputs, axis=0) diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/data_feed.py b/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/module.py b/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/module.py deleted file mode 100644 index 3fc92236f0d1e8a5361e160e6d84c05bd46bfd4f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/module.py +++ /dev/null @@ -1,273 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from res2net101_vd_26w_4s_imagenet.processor import postprocess, base64_to_cv2 -from res2net101_vd_26w_4s_imagenet.data_feed import reader -from res2net101_vd_26w_4s_imagenet.res2net_vd import Res2Net101_vd_26w_4s - - -@moduleinfo( - name="res2net101_vd_26w_4s_imagenet", - type="CV/image_classification", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - summary= - "res2net101_vd_26w_4s is a image classfication model, this module is trained with imagenet datasets.", - version="1.0.0") -class Res2Net101vd26w4sImagenet(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "res2net101_vd_26w_4s_imagenet_model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self.predictor_set = False - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = Res2Net101_vd_26w_4s() - output, feature_map = resnet_vd.net( - input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if not self.predictor_set: - self._set_config() - self.predictor_set = True - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/res2net_vd.py b/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/res2net_vd.py deleted file mode 100644 index ab7c155148f2acee81dd1c7935847c2e60d12fcd..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/res2net_vd.py +++ /dev/null @@ -1,292 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -import math -from paddle.fluid.param_attr import ParamAttr -__all__ = [ - "Res2Net_vd", "Res2Net50_vd_48w_2s", "Res2Net50_vd_26w_4s", - "Res2Net50_vd_14w_8s", "Res2Net50_vd_26w_6s", "Res2Net50_vd_26w_8s", - "Res2Net101_vd_26w_4s", "Res2Net152_vd_26w_4s", "Res2Net200_vd_26w_4s" -] - - -class Res2Net_vd(): - def __init__(self, layers=50, scales=4, width=26): - self.layers = layers - self.scales = scales - self.width = width - - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - basic_width = self.width * self.scales - num_filters1 = [basic_width * t for t in [1, 2, 4, 8]] - num_filters2 = [256 * t for t in [1, 2, 4, 8]] - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters1=num_filters1[block], - num_filters2=num_filters2[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - name=conv_name) - pool = fluid.layers.pool2d( - input=conv, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - elif if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters1, num_filters2, stride, name, - if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters1, - filter_size=1, - stride=1, - act='relu', - name=name + '_branch2a') - - xs = fluid.layers.split(conv0, self.scales, 1) - ys = [] - for s in range(self.scales - 1): - if s == 0 or stride == 2: - ys.append( - self.conv_bn_layer( - input=xs[s], - num_filters=num_filters1 // self.scales, - stride=stride, - filter_size=3, - act='relu', - name=name + '_branch2b_' + str(s + 1))) - else: - ys.append( - self.conv_bn_layer( - input=xs[s] + ys[-1], - num_filters=num_filters1 // self.scales, - stride=stride, - filter_size=3, - act='relu', - name=name + '_branch2b_' + str(s + 1))) - - if stride == 1: - ys.append(xs[-1]) - else: - ys.append( - fluid.layers.pool2d( - input=xs[-1], - pool_size=3, - pool_stride=stride, - pool_padding=1, - pool_type='avg')) - - conv1 = fluid.layers.concat(ys, axis=1) - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters2, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters2, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def Res2Net50_vd_48w_2s(): - model = Res2Net_vd(layers=50, scales=2, width=48) - return model - - -def Res2Net50_vd_26w_4s(): - model = Res2Net_vd(layers=50, scales=4, width=26) - return model - - -def Res2Net50_vd_14w_8s(): - model = Res2Net_vd(layers=50, scales=8, width=14) - return model - - -def Res2Net50_vd_26w_6s(): - model = Res2Net_vd(layers=50, scales=6, width=26) - return model - - -def Res2Net50_vd_26w_8s(): - model = Res2Net_vd(layers=50, scales=8, width=26) - return model - - -def Res2Net101_vd_26w_4s(): - model = Res2Net_vd(layers=101, scales=4, width=26) - return model - - -def Res2Net152_vd_26w_4s(): - model = Res2Net_vd(layers=152, scales=4, width=26) - return model - - -def Res2Net200_vd_26w_4s(): - model = Res2Net_vd(layers=200, scales=4, width=26) - return model diff --git a/hub_module/modules/image/classification/resnet101_imagenet/module.py b/hub_module/modules/image/classification/resnet101_imagenet/module.py deleted file mode 100644 index db5c32cb9d187777cbff8e9a537a5f621a2a7f0e..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet101_imagenet/module.py +++ /dev/null @@ -1,228 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(bn_name + "_offset"), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet101.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - self._num_channels_out = num_filters * 4 - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act="relu") - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet101.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act="relu") - return y - - -@moduleinfo(name="resnet101_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet101_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet101(nn.Layer): - """ResNet101 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet101, self).__init__() - - self.layers = 101 - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - conv_name, - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet101_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet101_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet101_vd_imagenet/module.py b/hub_module/modules/image/classification/resnet101_vd_imagenet/module.py deleted file mode 100644 index cf93440d1dc82ab27d94efedad2e4b23ed7bf3fb..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet101_vd_imagenet/module.py +++ /dev/null @@ -1,250 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet101_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet101_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet101_vd_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet101_vd_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet101_vd(nn.Layer): - """ResNet101_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet101_vd, self).__init__() - - self.layers = 101 - - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet101_vd_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet101_vd_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet101_vd_imagenet_ssld/module.py b/hub_module/modules/image/classification/resnet101_vd_imagenet_ssld/module.py deleted file mode 100644 index ec74075b070282c7e4581a7c4cb4d2e1b813c54c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet101_vd_imagenet_ssld/module.py +++ /dev/null @@ -1,250 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet101_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet101_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet101_vd_imagenet_ssld", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet101_vd_imagenet_ssld is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet101_vd(nn.Layer): - """ResNet101_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet101_vd, self).__init__() - - self.layers = 101 - - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet101_vd_ssld_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet101_vd_ssld_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet152_imagenet/module.py b/hub_module/modules/image/classification/resnet152_imagenet/module.py deleted file mode 100644 index fd375c9813765fcc7b778c8a0c4ff2843cb282de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet152_imagenet/module.py +++ /dev/null @@ -1,229 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(bn_name + "_offset"), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet152.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - self._num_channels_out = num_filters * 4 - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act="relu") - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet152.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act="relu") - return y - - -@moduleinfo(name="resnet152_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet152_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet152(nn.Layer): - """ResNet152 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet152, self).__init__() - - self.layers = 152 - depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - bottleneck_block = self.add_sublayer( - conv_name, - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet152_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet152_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet152_vd_imagenet/module.py b/hub_module/modules/image/classification/resnet152_vd_imagenet/module.py deleted file mode 100644 index 1b7dfd866f92d40137bb66d42b1286ff6293a892..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet152_vd_imagenet/module.py +++ /dev/null @@ -1,250 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet152_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet152_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet152_vd_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet152_vd_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet152_vd(nn.Layer): - """ResNet152_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet152_vd, self).__init__() - - self.layers = 152 - - depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet152_vd_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet152_vd_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet18_imagenet/module.py b/hub_module/modules/image/classification/resnet18_imagenet/module.py deleted file mode 100644 index 41817738b2642e47df42871e42340e6e18363c52..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet18_imagenet/module.py +++ /dev/null @@ -1,222 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(bn_name + "_offset"), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet18.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - self._num_channels_out = num_filters * 4 - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act="relu") - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet18.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act="relu") - return y - - -@moduleinfo(name="resnet18_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet18_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet18(nn.Layer): - """ResNet18 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet18, self).__init__() - - self.layers = 18 - depth = [2, 2, 2, 2] - num_channels = [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - conv_name, - BasicBlock(num_channels=num_channels[block] if i == 0 else num_filters[block], - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(basic_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet18_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet18_imagenet.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet18_vd_imagenet/module.py b/hub_module/modules/image/classification/resnet18_vd_imagenet/module.py deleted file mode 100644 index f534099662a9f1a867551568c42b4d25f8772edd..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet18_vd_imagenet/module.py +++ /dev/null @@ -1,242 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet18_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet18_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet18_vd_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet18_vd_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet18_vd(nn.Layer): - """ResNet18_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet18_vd, self).__init__() - - self.layers = 18 - depth = [2, 2, 2, 2] - num_channels = [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock(num_channels=num_channels[block] if i == 0 else num_filters[block], - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(basic_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet18_vd_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet18_vd_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet200_vd_imagenet/module.py b/hub_module/modules/image/classification/resnet200_vd_imagenet/module.py deleted file mode 100644 index 249907e48831847ce7f793ce227ed31ec2f8fcf7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet200_vd_imagenet/module.py +++ /dev/null @@ -1,250 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet200_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet200_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet200_vd_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet200_vd_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet200_vd(nn.Layer): - """ResNet200_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet200_vd, self).__init__() - - self.layers = 200 - - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet200_vd_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet200_vd_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet34_imagenet/module.py b/hub_module/modules/image/classification/resnet34_imagenet/module.py deleted file mode 100644 index 3af448aba8ed61fbae69781630ed5093a204ddae..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_imagenet/module.py +++ /dev/null @@ -1,222 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(bn_name + "_offset"), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet34.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - self._num_channels_out = num_filters * 4 - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act="relu") - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet34.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act="relu") - return y - - -@moduleinfo(name="resnet34_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet34_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet34(nn.Layer): - """ResNet34 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet34, self).__init__() - - self.layers = 34 - depth = [3, 4, 6, 3] - num_channels = [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - conv_name, - BasicBlock(num_channels=num_channels[block] if i == 0 else num_filters[block], - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(basic_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet34_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet34_imagenet.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/data_feed.py b/hub_module/modules/image/classification/resnet34_v2_imagenet/data_feed.py deleted file mode 100644 index ffeb258707b7c93eee3bab748e884a754f295f8f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_v2_imagenet/data_feed.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def test_reader(paths=None, images=None): - """data generator - :param paths: path to images. - :type paths: list, each element is a str - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - """ - img_list = [] - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = Image.open(img_path) - #img = cv2.imread(img_path) - img_list.append(img) - if images is not None: - for img in images: - img_list.append(Image.fromarray(np.uint8(img))) - for im in img_list: - im = process_image(im) - yield im diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/module.py b/hub_module/modules/image/classification/resnet34_v2_imagenet/module.py deleted file mode 100644 index bdd6411337db811dd5c0b0226c9b9f0bf603aee7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_v2_imagenet/module.py +++ /dev/null @@ -1,272 +0,0 @@ -import os -import ast -import argparse - -import numpy as np -import paddlehub as hub -import paddle.fluid as fluid -from paddlehub.module.module import moduleinfo, runnable -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser - -from resnet34_v2_imagenet.resnet import ResNet, ResNetC5 -from resnet34_v2_imagenet.processor import load_label_info -from resnet34_v2_imagenet.data_feed import test_reader - - -@moduleinfo( - name="resnet34_v2_imagenet", - version="1.1.0", - type="cv/classification", - summary= - "ResNet34 is a image classfication model trained with ImageNet-2012 dataset.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class ResNet34(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "resnet34_v2_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.infer_prog = None - self.pred_out = None - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - input_image=None, - trainable=True, - pretrained=True, - param_prefix='', - get_prediction=False, - variant='d', - norm_type='bn', - feature_maps=[3, 4, 5], - return_c5=False): - """Distill the Head Features, so as to perform transfer learning. - - :param input_image: image tensor. - :type input_image: - :param trainable: whether to set parameters trainable. - :type trainable: bool - :param pretrained: whether to load default pretrained model. - :type pretrained: bool - :param param_prefix: the prefix of parameters in yolo_head and backbone - :type param_prefix: str - :param get_prediction: whether to get prediction, - if True, outputs is {'bbox_out': bbox_out}, - if False, outputs is {'head_features': head_features}. - :type get_prediction: bool - :param depth: depth of network - :type depth: int - :param variant: type of resnet - :type variant: str - :param norm_type: type of normlization - :type norm_type: str - :param feature_maps: stage of output - :type feature_maps: list - """ - context_prog = input_image.block.program if input_image else fluid.Program( - ) - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - if return_c5: - return ResNetC5( - depth=34, - norm_type=norm_type, - variant=variant, - feature_maps=feature_maps) - image = input_image if input_image else fluid.data( - name='image', - shape=[-1, 3, 224, 224], - dtype='float32', - lod_level=0) - backbone = ResNet(depth=34, variant=variant, norm_type=norm_type,\ - feature_maps=feature_maps, get_prediction=get_prediction) - - out = backbone(image) - inputs = {'image': image} - if get_prediction: - outputs = {'pred_out': out} - else: - outputs = {'body_feats': out} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - if not param_prefix: - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - main_program=context_prog, - predicate=_if_exist) - else: - exe.run(startup_program) - return inputs, outputs, context_prog - - def classification(self, - paths=None, - images=None, - use_gpu=False, - batch_size=1, - top_k=2): - """API of Classification. - :param paths: the path of images. - :type paths: list, each element is correspond to the path of an image. - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - :param use_gpu: whether to use gpu or not. - :type use_gpu: bool - :param batch_size: bathc size. - :type batch_size: int - :param top_k: result of top k - :typr top_k: int - """ - if self.infer_prog is None: - inputs, outputs, self.infer_prog = self.context( - trainable=False, pretrained=True, get_prediction=True) - self.infer_prog = self.infer_prog.clone(for_test=True) - self.pred_out = outputs['pred_out'] - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - all_images = [] - paths = paths if paths else [] - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = int(np.ceil(images_num / batch_size)) - - res_list = [] - top_k = max(min(top_k, 1000), 1) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - batch_data = np.array(batch_data).astype('float32') - data_tensor = PaddleTensor(batch_data.copy()) - if use_gpu: - result = self.gpu_predictor.run([data_tensor]) - else: - result = self.cpu_predictor.run([data_tensor]) - for i, res in enumerate(result[0].as_ndarray()): - res_dict = {} - pred_label = np.argsort(res)[::-1][:top_k] - for k in pred_label: - class_name = self.label_names[int(k)].split(',')[0] - max_prob = res[k] - res_dict[class_name] = max_prob - res_list.append(res_dict) - return res_list - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = [] - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.classification( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/nonlocal_helper.py b/hub_module/modules/image/classification/resnet34_v2_imagenet/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_v2_imagenet/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/resnet.py b/hub_module/modules/image/classification/resnet34_v2_imagenet/resnet.py deleted file mode 100644 index 126fb6fd9ffd23a2c9def048ec44cbe9704931fa..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_v2_imagenet/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=34, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/classification/resnet34_vd_imagenet/module.py b/hub_module/modules/image/classification/resnet34_vd_imagenet/module.py deleted file mode 100644 index 4c0a32e73a92e46f54e87fabf3d1f62b1643d95b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_vd_imagenet/module.py +++ /dev/null @@ -1,242 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet34_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet34_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet34_vd_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet34_vd_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet34_vd(nn.Layer): - """ResNet34_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet34_vd, self).__init__() - - self.layers = 34 - depth = [3, 4, 6, 3] - num_channels = [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock(num_channels=num_channels[block] if i == 0 else num_filters[block], - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(basic_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet34_vd_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet34_vd_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet34_vd_imagenet_ssld/module.py b/hub_module/modules/image/classification/resnet34_vd_imagenet_ssld/module.py deleted file mode 100644 index c4d2677a9c9539a2edfed5439035504dfa233486..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet34_vd_imagenet_ssld/module.py +++ /dev/null @@ -1,242 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet34_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet34_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet34_vd_imagenet_ssld", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet34_vd_imagenet_ssld is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet34_vd(nn.Layer): - """ResNet34_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet34_vd, self).__init__() - - self.layers = 34 - depth = [3, 4, 6, 3] - num_channels = [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock(num_channels=num_channels[block] if i == 0 else num_filters[block], - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(basic_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet34_vd_ssld_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet34_vd_ssld_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet50_imagenet/module.py b/hub_module/modules/image/classification/resnet50_imagenet/module.py deleted file mode 100644 index 55587c47a823f4526f9b85c256d73dad28795c1f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_imagenet/module.py +++ /dev/null @@ -1,222 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(bn_name + "_offset"), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet50.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - self._num_channels_out = num_filters * 4 - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act="relu") - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet50.""" - def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act="relu", - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act="relu") - return y - - -@moduleinfo(name="resnet50_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet50_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet50(nn.Layer): - """ResNet50 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet50, self).__init__() - - self.layers = 50 - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - conv_name, - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet50_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_imagenet.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/data_feed.py b/hub_module/modules/image/classification/resnet50_v2_imagenet/data_feed.py deleted file mode 100644 index ffeb258707b7c93eee3bab748e884a754f295f8f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_v2_imagenet/data_feed.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def test_reader(paths=None, images=None): - """data generator - :param paths: path to images. - :type paths: list, each element is a str - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - """ - img_list = [] - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = Image.open(img_path) - #img = cv2.imread(img_path) - img_list.append(img) - if images is not None: - for img in images: - img_list.append(Image.fromarray(np.uint8(img))) - for im in img_list: - im = process_image(im) - yield im diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/module.py b/hub_module/modules/image/classification/resnet50_v2_imagenet/module.py deleted file mode 100644 index 48a15492c97bdaab0f69599411cfdc3903d49185..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_v2_imagenet/module.py +++ /dev/null @@ -1,272 +0,0 @@ -import os -import ast -import argparse - -import numpy as np -import paddlehub as hub -import paddle.fluid as fluid -from paddlehub.module.module import moduleinfo, runnable -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser - -from resnet50_v2_imagenet.resnet import ResNet, ResNetC5 -from resnet50_v2_imagenet.processor import load_label_info -from resnet50_v2_imagenet.data_feed import test_reader - - -@moduleinfo( - name="resnet50_v2_imagenet", - version="1.1.0", - type="cv/classification", - summary= - "ResNet50 is a image classfication model trained with ImageNet-2012 dataset.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class ResNet50(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "resnet50_v2_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.infer_prog = None - self.pred_out = None - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - input_image=None, - trainable=True, - pretrained=True, - param_prefix='', - get_prediction=False, - variant='d', - norm_type='bn', - feature_maps=[3, 4, 5], - return_c5=False): - """Distill the Head Features, so as to perform transfer learning. - - :param input_image: image tensor. - :type input_image: - :param trainable: whether to set parameters trainable. - :type trainable: bool - :param pretrained: whether to load default pretrained model. - :type pretrained: bool - :param param_prefix: the prefix of parameters in yolo_head and backbone - :type param_prefix: str - :param get_prediction: whether to get prediction, - if True, outputs is {'bbox_out': bbox_out}, - if False, outputs is {'head_features': head_features}. - :type get_prediction: bool - :param depth: depth of network - :type depth: int - :param variant: type of resnet - :type variant: str - :param norm_type: type of normlization - :type norm_type: str - :param feature_maps: stage of output - :type feature_maps: list - """ - context_prog = input_image.block.program if input_image else fluid.Program( - ) - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - if return_c5: - return ResNetC5( - depth=50, - norm_type=norm_type, - variant=variant, - feature_maps=feature_maps) - image = input_image if input_image else fluid.data( - name='image', - shape=[-1, 3, 224, 224], - dtype='float32', - lod_level=0) - backbone = ResNet(depth=50, variant=variant, norm_type=norm_type,\ - feature_maps=feature_maps, get_prediction=get_prediction) - - out = backbone(image) - inputs = {'image': image} - if get_prediction: - outputs = {'pred_out': out} - else: - outputs = {'body_feats': out} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - if not param_prefix: - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - main_program=context_prog, - predicate=_if_exist) - else: - exe.run(startup_program) - return inputs, outputs, context_prog - - def classification(self, - paths=None, - images=None, - use_gpu=False, - batch_size=1, - top_k=2): - """API of Classification. - :param paths: the path of images. - :type paths: list, each element is correspond to the path of an image. - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - :param use_gpu: whether to use gpu or not. - :type use_gpu: bool - :param batch_size: bathc size. - :type batch_size: int - :param top_k: result of top k - :typr top_k: int - """ - if self.infer_prog is None: - inputs, outputs, self.infer_prog = self.context( - trainable=False, pretrained=True, get_prediction=True) - self.infer_prog = self.infer_prog.clone(for_test=True) - self.pred_out = outputs['pred_out'] - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - all_images = [] - paths = paths if paths else [] - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = int(np.ceil(images_num / batch_size)) - - res_list = [] - top_k = max(min(top_k, 1000), 1) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - batch_data = np.array(batch_data).astype('float32') - data_tensor = PaddleTensor(batch_data.copy()) - if use_gpu: - result = self.gpu_predictor.run([data_tensor]) - else: - result = self.cpu_predictor.run([data_tensor]) - for i, res in enumerate(result[0].as_ndarray()): - res_dict = {} - pred_label = np.argsort(res)[::-1][:top_k] - for k in pred_label: - class_name = self.label_names[int(k)].split(',')[0] - max_prob = res[k] - res_dict[class_name] = max_prob - res_list.append(res_dict) - return res_list - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = [] - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.classification( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/nonlocal_helper.py b/hub_module/modules/image/classification/resnet50_v2_imagenet/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_v2_imagenet/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/resnet.py b/hub_module/modules/image/classification/resnet50_v2_imagenet/resnet.py deleted file mode 100644 index 5965b920022386983c6bf0df288c8e6b5262fc1a..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_v2_imagenet/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='d', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/classification/resnet50_vd_10w/module.py b/hub_module/modules/image/classification/resnet50_vd_10w/module.py deleted file mode 100644 index d0ca0e0cba2e3c4fff4d6160c354920a3b9c25ab..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_10w/module.py +++ /dev/null @@ -1,242 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet50_vd_10w", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet50_vd_imagenet_ssld is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet50_vd(nn.Layer): - """ResNet50_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet50_vd, self).__init__() - - self.layers = 50 - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet50_vd_10w.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_vd_10w.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/data_feed.py b/hub_module/modules/image/classification/resnet50_vd_animals/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_animals/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/module.py b/hub_module/modules/image/classification/resnet50_vd_animals/module.py deleted file mode 100644 index ed6abe6a873ad1df687792e854a9b5a7c405fe45..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_animals/module.py +++ /dev/null @@ -1,278 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from resnet50_vd_animals.processor import postprocess, base64_to_cv2 -from resnet50_vd_animals.data_feed import reader -from resnet50_vd_animals.resnet_vd import ResNet50_vd - - -@moduleinfo( - name="resnet50_vd_animals", - type="CV/image_classification", - author="baidu-vis", - author_email="", - summary= - "ResNet50vd is a image classfication model, this module is trained with Baidu's self-built animals dataset.", - version="1.0.0") -class ResNet50vdAnimals(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = ResNet50_vd() - output, feature_map = resnet_vd.net( - input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/resnet_vd.py b/hub_module/modules/image/classification/resnet50_vd_animals/resnet_vd.py deleted file mode 100755 index e3f765e01c22eded6e7e6d9475d2147727b6ef3b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_animals/resnet_vd.py +++ /dev/null @@ -1,255 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd" -] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50, is_3x3=False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == 0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNet50_vd(): - model = ResNet(layers=50, is_3x3=True) - return model - - -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3=True) - return model - - -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3=True) - return model - - -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3=True) - return model diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/data_feed.py b/hub_module/modules/image/classification/resnet50_vd_dishes/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_dishes/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/module.py b/hub_module/modules/image/classification/resnet50_vd_dishes/module.py deleted file mode 100644 index b554a8fc63d98f7e79edc2d634f6dd91a18e915d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_dishes/module.py +++ /dev/null @@ -1,278 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from resnet50_vd_dishes.processor import postprocess, base64_to_cv2 -from resnet50_vd_dishes.data_feed import reader -from resnet50_vd_dishes.resnet_vd import ResNet50_vd - - -@moduleinfo( - name="resnet50_vd_dishes", - type="CV/image_classification", - author="baidu-vis", - author_email="", - summary= - "ResNet50vd is a image classfication model, this module is trained with Baidu's self-built dishes dataset.", - version="1.0.0") -class ResNet50vdDishes(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = ResNet50_vd() - output, feature_map = resnet_vd.net( - input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/resnet_vd.py b/hub_module/modules/image/classification/resnet50_vd_dishes/resnet_vd.py deleted file mode 100755 index e3f765e01c22eded6e7e6d9475d2147727b6ef3b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_dishes/resnet_vd.py +++ /dev/null @@ -1,255 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd" -] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50, is_3x3=False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == 0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNet50_vd(): - model = ResNet(layers=50, is_3x3=True) - return model - - -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3=True) - return model - - -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3=True) - return model - - -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3=True) - return model diff --git a/hub_module/modules/image/classification/resnet50_vd_imagenet/module.py b/hub_module/modules/image/classification/resnet50_vd_imagenet/module.py deleted file mode 100644 index ca1ccea6186a5d9355cf75381f011a43cd4d0256..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_imagenet/module.py +++ /dev/null @@ -1,243 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet50_vd_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet50_vd_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet50_vd(nn.Layer): - """ResNet50_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet50_vd, self).__init__() - - self.layers = 50 - - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet50_vd_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_vd_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet50_vd_imagenet_ssld/module.py b/hub_module/modules/image/classification/resnet50_vd_imagenet_ssld/module.py deleted file mode 100644 index 64957fa3aae40088e29f3b5a5dd9dbf67cc387da..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_imagenet_ssld/module.py +++ /dev/null @@ -1,244 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNet50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -class BasicBlock(nn.Layer): - """Basic block for ResNet50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.elementwise_add(x=short, y=conv1, act='relu') - return y - - -@moduleinfo(name="resnet50_vd_imagenet_ssld", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnet50_vd_imagenet_ssld is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNet50_vd(nn.Layer): - """ResNet50_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNet50_vd, self).__init__() - - self.layers = 50 - - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), - bias_attr=ParamAttr(name="fc_0.b_0")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnet50_vd_ssld.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_vd_ssld.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/data_feed.py b/hub_module/modules/image/classification/resnet50_vd_wildanimals/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_wildanimals/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/module.py b/hub_module/modules/image/classification/resnet50_vd_wildanimals/module.py deleted file mode 100644 index 3a8d811adac5ebbd6a6f3c729e82accab4272736..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_wildanimals/module.py +++ /dev/null @@ -1,278 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from resnet50_vd_wildanimals.processor import postprocess, base64_to_cv2 -from resnet50_vd_wildanimals.data_feed import reader -from resnet50_vd_wildanimals.resnet_vd import ResNet50_vd - - -@moduleinfo( - name="resnet50_vd_wildanimals", - type="CV/image_classification", - author="baidu-vis", - author_email="", - summary= - "ResNet50vd is a image classfication model, this module is trained with IFAW's self-built wild animals dataset.", - version="1.0.0") -class ResNet50vdWildAnimals(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = ResNet50_vd() - output, feature_map = resnet_vd.net( - input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (numpy.ndarray): data of images, shape of each is [H, W, C]. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/resnet_vd.py b/hub_module/modules/image/classification/resnet50_vd_wildanimals/resnet_vd.py deleted file mode 100755 index e3f765e01c22eded6e7e6d9475d2147727b6ef3b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnet50_vd_wildanimals/resnet_vd.py +++ /dev/null @@ -1,255 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd" -] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50, is_3x3=False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == 0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNet50_vd(): - model = ResNet(layers=50, is_3x3=True) - return model - - -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3=True) - return model - - -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3=True) - return model - - -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3=True) - return model diff --git a/hub_module/modules/image/classification/resnext101_32x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext101_32x4d_imagenet/module.py deleted file mode 100644 index 1a8585062b99c465ec489306bbb84965c1f3ecf2..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext101_32x4d_imagenet/module.py +++ /dev/null @@ -1,193 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt101.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext101_32x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext101_32x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt101_32x4d(nn.Layer): - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt101_32x4d, self).__init__() - - self.layers = 101 - self.cardinality = 32 - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [128, 256, 512, 1024] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext101_32x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_32x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext101_64x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext101_64x4d_imagenet/module.py deleted file mode 100644 index 3c5703d30b679e27a5cb4993d16b92abf0555084..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext101_64x4d_imagenet/module.py +++ /dev/null @@ -1,193 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt101.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext101_64x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext101_64x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt101_64x4d(nn.Layer): - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt101_64x4d, self).__init__() - - self.layers = 101 - self.cardinality = 64 - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [256, 512, 1024, 2048] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext101_64x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_64x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext101_vd_32x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext101_vd_32x4d_imagenet/module.py deleted file mode 100644 index ba128d83ba6b65cab6994e145ccd0dfee8d6686d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext101_vd_32x4d_imagenet/module.py +++ /dev/null @@ -1,212 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext101_vd_32x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext101_vd_32x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt101_vd(nn.Layer): - """ResNeXt101_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt101_vd, self).__init__() - - self.layers = 101 - self.cardinality = 32 - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [128, 256, 512, 1024] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext101_vd_32x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_vd_32x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext101_vd_64x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext101_vd_64x4d_imagenet/module.py deleted file mode 100644 index dd26a774a3fa895bdc22a58dbe383b278bbd48f3..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext101_vd_64x4d_imagenet/module.py +++ /dev/null @@ -1,212 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext101_vd_64x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext101_vd_64x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt101_vd(nn.Layer): - """ResNeXt101_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt101_vd, self).__init__() - - self.layers = 101 - self.cardinality = 64 - depth = [3, 4, 23, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [256, 512, 1024, 2048] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext101_vd_64x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_vd_64x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext152_32x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext152_32x4d_imagenet/module.py deleted file mode 100644 index e9f6a57328e9d87587c1f8d220c39bc9c9599a41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext152_32x4d_imagenet/module.py +++ /dev/null @@ -1,193 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt152.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext152_32x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext152_32x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt152_32x4d(nn.Layer): - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt152_32x4d, self).__init__() - - self.layers = 152 - self.cardinality = 32 - depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [128, 256, 512, 1024] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext152_32x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_32x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext152_64x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext152_64x4d_imagenet/module.py deleted file mode 100644 index dbcf878cfa2f4994a1522f57269304ff00df8b61..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext152_64x4d_imagenet/module.py +++ /dev/null @@ -1,193 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt152.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext152_64x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext152_64x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt152_64x4d(nn.Layer): - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt152_64x4d, self).__init__() - - self.layers = 152 - self.cardinality = 64 - depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [256, 512, 1024, 2048] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext152_64x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_64x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext152_vd_32x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext152_vd_32x4d_imagenet/module.py deleted file mode 100644 index a1f4579dd17ec1f9b055bc5a5244170bad489288..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext152_vd_32x4d_imagenet/module.py +++ /dev/null @@ -1,212 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt152_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext152_vd_32x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext152_vd_32x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt152_vd(nn.Layer): - """ResNeXt152_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt152_vd, self).__init__() - - self.layers = 152 - self.cardinality = 32 - depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [128, 256, 512, 1024] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext152_vd_32x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_vd_32x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext152_vd_64x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext152_vd_64x4d_imagenet/module.py deleted file mode 100644 index b9e8f79378f1c6db092693841168f96221bcf13a..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext152_vd_64x4d_imagenet/module.py +++ /dev/null @@ -1,212 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt152_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext152_vd_64x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext152_vd_64x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt152_vd(nn.Layer): - """ResNeXt152_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt152_vd, self).__init__() - - self.layers = 152 - self.cardinality = 64 - depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [256, 512, 1024, 2048] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - if block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext152_vd_64x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_vd_64x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext50_32x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext50_32x4d_imagenet/module.py deleted file mode 100644 index 44eeea3a91b1d595e3969276d127a3b94ad8f82b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext50_32x4d_imagenet/module.py +++ /dev/null @@ -1,187 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt50.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext50_32x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext50_32x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt50_32x4d(nn.Layer): - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt50_32x4d, self).__init__() - - self.layers = 50 - self.cardinality = 32 - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [128, 256, 512, 1024] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext50_32x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_32x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext50_64x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext50_64x4d_imagenet/module.py deleted file mode 100644 index 4f44ffb8dfa2ae2c5b00903d104fe577d1c98c66..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext50_64x4d_imagenet/module.py +++ /dev/null @@ -1,187 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt50.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=stride, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext50_64x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext50_64x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt50_64x4d(nn.Layer): - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt50_64x4d, self).__init__() - - self.layers = 50 - self.cardinality = 64 - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [256, 512, 1024, 2048] - - self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - self.pool2d_avg_channels = num_channels[-1] * 2 - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext50_64x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_64x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv(inputs) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext50_vd_32x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext50_vd_32x4d_imagenet/module.py deleted file mode 100644 index b388ce5c34070fa821b346abffe9cccb2b3d9d2a..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext50_vd_32x4d_imagenet/module.py +++ /dev/null @@ -1,206 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext50_vd_32x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext50_vd_32x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt50_vd(nn.Layer): - """ResNeXt50_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt50_vd, self).__init__() - - self.layers = 50 - self.cardinality = 32 - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [128, 256, 512, 1024] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext50_vd_32x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_vd_32x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/resnext50_vd_64x4d_imagenet/module.py b/hub_module/modules/image/classification/resnext50_vd_64x4d_imagenet/module.py deleted file mode 100644 index b1eb63c03283e8eb3244410a093a3ffec585a018..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/resnext50_vd_64x4d_imagenet/module.py +++ /dev/null @@ -1,206 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os - -import numpy as np -import paddle -from paddle import ParamAttr -import paddle.nn as nn -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__( - self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - is_vd_mode: bool = False, - act: str = None, - name: str = None, - ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - """Bottleneck Block for ResNeXt50_vd.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - cardinality: int, - shortcut: bool = True, - if_first: bool = False, - name: str = None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - groups=cardinality, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer(num_channels=num_filters, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer(num_channels=num_channels, - num_filters=num_filters * 2 if cardinality == 32 else num_filters, - filter_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs: paddle.Tensor): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - y = paddle.elementwise_add(x=short, y=conv2, act='relu') - return y - - -@moduleinfo(name="resnext50_vd_64x4d_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="resnext50_vd_64x4d_imagenet is a classification model, " - "this module is trained with Baidu open sourced dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ResNeXt50_vd(nn.Layer): - """ResNeXt50_vd model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ResNeXt50_vd, self).__init__() - - self.layers = 50 - self.cardinality = 64 - depth = [3, 4, 6, 3] - num_channels = [64, 256, 512, 1024] - num_filters = [256, 512, 1024, 2048] - - self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") - self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") - self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") - - self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.block_list = [] - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock(num_channels=num_channels[block] if i == 0 else num_filters[block] * - int(64 // self.cardinality), - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=self.cardinality, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - self.block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = AdaptiveAvgPool2d(1) - - self.pool2d_avg_channels = num_channels[-1] * 2 - - stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) - - self.out = Linear(self.pool2d_avg_channels, - class_dim, - weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name="fc_offset")) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'resnext50_vd_64x4d_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_vd_64x4d_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - for block in self.block_list: - y = block(y) - y = self.pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) - y = self.out(y) - return y diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/data_feed.py b/hub_module/modules/image/classification/se_resnet18_vd_imagenet/data_feed.py deleted file mode 100644 index d5ffb5efe9fdfbd143b949892aa44d851e907b41..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/data_feed.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im_path'] = im_path - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - if images is not None: - assert type(images), "images is a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - component.append(each) - - for element in component: - element['image'] = process_image(element['org_im']) - yield element diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/module.py b/hub_module/modules/image/classification/se_resnet18_vd_imagenet/module.py deleted file mode 100644 index 4e6d6db7fd3140ca659e7ffcc29de3fe35af37bd..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/module.py +++ /dev/null @@ -1,282 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from se_resnet18_vd_imagenet.processor import postprocess, base64_to_cv2 -from se_resnet18_vd_imagenet.data_feed import reader -from se_resnet18_vd_imagenet.se_resnet import SE_ResNet18_vd - - -@moduleinfo( - name="se_resnet18_vd_imagenet", - type="CV/image_classification", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - summary= - "SE_ResNet18_vd is a image classfication model, this module is trained with imagenet datasets.", - version="1.0.0") -class SEResNet18vdImageNet(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "se_resnet18_vd_imagenet_model") - label_file = os.path.join(self.directory, "label_list.txt") - with open(label_file, 'r', encoding='utf-8') as file: - self.label_list = file.read().split("\n")[:-1] - self.predictor_set = False - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = SE_ResNet18_vd() - output, feature_map = resnet_vd.net( - input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = { - 'classification': name_prefix + output.name, - 'feature_map': name_prefix + feature_map.name - } - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - return b - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - context_prog, - predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): - """ - API for image classification. - - Args: - images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - top_k (int): Return top k results. - - Returns: - res (list[dict]): The classfication results. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if not self.predictor_set: - self._set_config() - self.predictor_set = True - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - out = postprocess( - data_out=predictor_output[0].as_ndarray(), - label_list=self.label_list, - top_k=top_k) - res += out - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.classification(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.classification( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_config_group.add_argument( - '--top_k', - type=ast.literal_eval, - default=1, - help="Return top k results.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/resnet_vd.py b/hub_module/modules/image/classification/se_resnet18_vd_imagenet/resnet_vd.py deleted file mode 100755 index d8ad3260710b67f822bf34914c2f9c9c8a56bfaf..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/resnet_vd.py +++ /dev/null @@ -1,291 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", - "ResNet152_vd", "ResNet200_vd" -] - - -class ResNet(): - def __init__(self, layers=50, is_3x3=False): - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - name=conv_name) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - elif if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - def basic_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self.shortcut( - input, - num_filters, - stride, - if_first=if_first, - name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18_vd(): - model = ResNet(layers=18, is_3x3=True) - return model - - -def ResNet34_vd(): - model = ResNet(layers=34, is_3x3=True) - return model - - -def ResNet50_vd(): - model = ResNet(layers=50, is_3x3=True) - return model - - -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3=True) - return model - - -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3=True) - return model - - -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3=True) - return model diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/se_resnet.py b/hub_module/modules/image/classification/se_resnet18_vd_imagenet/se_resnet.py deleted file mode 100644 index deb54b79b35c0ce36663266583bf59d33ec86579..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/se_resnet.py +++ /dev/null @@ -1,335 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -import math - -__all__ = [ - "SE_ResNet_vd", "SE_ResNet18_vd", "SE_ResNet34_vd", "SE_ResNet50_vd", - "SE_ResNet101_vd", "SE_ResNet152_vd", "SE_ResNet200_vd" -] - - -class SE_ResNet_vd(): - def __init__(self, layers=50, is_3x3=False): - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - reduction_ratio = 16 - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - else: - conv = self.conv_bn_layer( - input=input, - num_filters=32, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, - num_filters=32, - filter_size=3, - stride=1, - act='relu', - name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=64, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - reduction_ratio=reduction_ratio, - name=conv_name) - - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == i == 0, - reduction_ratio=reduction_ratio, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) - - return out, pool - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg', - ceil_mode=True) - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) - elif if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first, - reduction_ratio): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - scale = self.squeeze_excitation( - input=conv2, - num_channels=num_filters * 4, - reduction_ratio=reduction_ratio, - name='fc_' + name) - - short = self.shortcut( - input, - num_filters * 4, - stride, - if_first=if_first, - name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') - - def basic_block(self, input, num_filters, stride, name, if_first, - reduction_ratio): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - scale = self.squeeze_excitation( - input=conv1, - num_channels=num_filters, - reduction_ratio=reduction_ratio, - name='fc_' + name) - short = self.shortcut( - input, - num_filters, - stride, - if_first=if_first, - name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') - - def squeeze_excitation(self, - input, - num_channels, - reduction_ratio, - name=None): - pool = fluid.layers.pool2d( - input=input, pool_size=0, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - squeeze = fluid.layers.fc( - input=pool, - size=num_channels // reduction_ratio, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_sqz_weights'), - bias_attr=ParamAttr(name=name + '_sqz_offset')) - stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) - excitation = fluid.layers.fc( - input=squeeze, - size=num_channels, - act='sigmoid', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_exc_weights'), - bias_attr=ParamAttr(name=name + '_exc_offset')) - scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) - return scale - - -def SE_ResNet18_vd(): - model = SE_ResNet_vd(layers=18, is_3x3=True) - return model - - -def SE_ResNet34_vd(): - model = SE_ResNet_vd(layers=34, is_3x3=True) - return model - - -def SE_ResNet50_vd(): - model = SE_ResNet_vd(layers=50, is_3x3=True) - return model - - -def SE_ResNet101_vd(): - model = SE_ResNet_vd(layers=101, is_3x3=True) - return model - - -def SE_ResNet152_vd(): - model = SE_ResNet_vd(layers=152, is_3x3=True) - return model - - -def SE_ResNet200_vd(): - model = SE_ResNet_vd(layers=200, is_3x3=True) - return model diff --git a/hub_module/modules/image/classification/shufflenet_v2_imagenet/module.py b/hub_module/modules/image/classification/shufflenet_v2_imagenet/module.py deleted file mode 100644 index 1049df3a1395b068d726762839d978dad0425c5b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/shufflenet_v2_imagenet/module.py +++ /dev/null @@ -1,294 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import MSRA -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -def channel_shuffle(x: paddle.Tensor, groups: int): - """Shuffle input channels.""" - batchsize, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3] - channels_per_group = num_channels // groups - - # reshape - x = paddle.reshape(x=x, shape=[batchsize, groups, channels_per_group, height, width]) - - x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) - # flatten - x = paddle.reshape(x=x, shape=[batchsize, num_channels, height, width]) - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - filter_size: int, - num_filters: int, - stride: int, - padding: int, - channels: int = None, - num_groups: int = 1, - if_act: bool = True, - act: str = 'relu', - name: str = None): - super(ConvBNLayer, self).__init__() - self._if_act = if_act - assert act in ['relu', 'swish'], \ - "supported act are {} but your act is {}".format( - ['relu', 'swish'], act) - self._act = act - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - weight_attr=ParamAttr(initializer=MSRA(), name=name + "_weights"), - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, - param_attr=ParamAttr(name=name + "_bn_scale"), - bias_attr=ParamAttr(name=name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, inputs: paddle.Tensor, if_act: bool = True): - y = self._conv(inputs) - y = self._batch_norm(y) - if self._if_act: - y = F.relu(y) if self._act == 'relu' else F.swish(y) - return y - - -class InvertedResidualUnit(nn.Layer): - """Inverted Residual unit.""" - def __init__(self, - num_channels: int, - num_filters: int, - stride: int, - benchmodel: int, - act: str = 'relu', - name: str = None): - super(InvertedResidualUnit, self).__init__() - assert stride in [1, 2], \ - "supported stride are {} but your stride is {}".format([1, 2], stride) - self.benchmodel = benchmodel - oup_inc = num_filters // 2 - inp = num_channels - if benchmodel == 1: - self._conv_pw = ConvBNLayer(num_channels=num_channels // 2, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act=act, - name='stage_' + name + '_conv1') - self._conv_dw = ConvBNLayer(num_channels=oup_inc, - num_filters=oup_inc, - filter_size=3, - stride=stride, - padding=1, - num_groups=oup_inc, - if_act=False, - act=act, - name='stage_' + name + '_conv2') - self._conv_linear = ConvBNLayer(num_channels=oup_inc, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act=act, - name='stage_' + name + '_conv3') - else: - # branch1 - self._conv_dw_1 = ConvBNLayer(num_channels=num_channels, - num_filters=inp, - filter_size=3, - stride=stride, - padding=1, - num_groups=inp, - if_act=False, - act=act, - name='stage_' + name + '_conv4') - self._conv_linear_1 = ConvBNLayer(num_channels=inp, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act=act, - name='stage_' + name + '_conv5') - # branch2 - self._conv_pw_2 = ConvBNLayer(num_channels=num_channels, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act=act, - name='stage_' + name + '_conv1') - self._conv_dw_2 = ConvBNLayer(num_channels=oup_inc, - num_filters=oup_inc, - filter_size=3, - stride=stride, - padding=1, - num_groups=oup_inc, - if_act=False, - act=act, - name='stage_' + name + '_conv2') - self._conv_linear_2 = ConvBNLayer(num_channels=oup_inc, - num_filters=oup_inc, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - act=act, - name='stage_' + name + '_conv3') - - def forward(self, inputs: paddle.Tensor): - if self.benchmodel == 1: - x1, x2 = paddle.split(inputs, num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], axis=1) - x2 = self._conv_pw(x2) - x2 = self._conv_dw(x2) - x2 = self._conv_linear(x2) - out = paddle.concat([x1, x2], axis=1) - else: - x1 = self._conv_dw_1(inputs) - x1 = self._conv_linear_1(x1) - - x2 = self._conv_pw_2(inputs) - x2 = self._conv_dw_2(x2) - x2 = self._conv_linear_2(x2) - out = paddle.concat([x1, x2], axis=1) - - return channel_shuffle(out, 2) - - -@moduleinfo(name="shufflenet_v2_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="shufflenet_v2_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class ShuffleNet(nn.Layer): - """ShuffleNet model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(ShuffleNet, self).__init__() - self.scale = 1 - self.class_dim = class_dim - stage_repeats = [4, 8, 4] - stage_out_channels = [-1, 24, 116, 232, 464, 1024] - - # 1. conv1 - self._conv1 = ConvBNLayer(num_channels=3, - num_filters=stage_out_channels[1], - filter_size=3, - stride=2, - padding=1, - if_act=True, - act='relu', - name='stage1_conv') - self._max_pool = MaxPool2d(kernel_size=3, stride=2, padding=1) - - # 2. bottleneck sequences - self._block_list = [] - i = 1 - in_c = int(32) - for idxstage in range(len(stage_repeats)): - numrepeat = stage_repeats[idxstage] - output_channel = stage_out_channels[idxstage + 2] - for i in range(numrepeat): - if i == 0: - block = self.add_sublayer( - str(idxstage + 2) + '_' + str(i + 1), - InvertedResidualUnit(num_channels=stage_out_channels[idxstage + 1], - num_filters=output_channel, - stride=2, - benchmodel=2, - act='relu', - name=str(idxstage + 2) + '_' + str(i + 1))) - self._block_list.append(block) - else: - block = self.add_sublayer( - str(idxstage + 2) + '_' + str(i + 1), - InvertedResidualUnit(num_channels=output_channel, - num_filters=output_channel, - stride=1, - benchmodel=1, - act='relu', - name=str(idxstage + 2) + '_' + str(i + 1))) - self._block_list.append(block) - - # 3. last_conv - self._last_conv = ConvBNLayer(num_channels=stage_out_channels[-2], - num_filters=stage_out_channels[-1], - filter_size=1, - stride=1, - padding=0, - if_act=True, - act='relu', - name='conv5') - - # 4. pool - self._pool2d_avg = AdaptiveAvgPool2d(1) - self._out_c = stage_out_channels[-1] - # 5. fc - self._fc = Linear(stage_out_channels[-1], - class_dim, - weight_attr=ParamAttr(name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'shufflenet_v2_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/shufflenet_v2_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - y = self._conv1(inputs) - y = self._max_pool(y) - for inv in self._block_list: - y = inv(y) - y = self._last_conv(y) - y = self._pool2d_avg(y) - y = paddle.reshape(y, shape=[-1, self._out_c]) - y = self._fc(y) - return y diff --git a/hub_module/modules/image/classification/vgg16_imagenet/data_feed.py b/hub_module/modules/image/classification/vgg16_imagenet/data_feed.py deleted file mode 100644 index ffeb258707b7c93eee3bab748e884a754f295f8f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/vgg16_imagenet/data_feed.py +++ /dev/null @@ -1,74 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -DATA_DIM = 224 -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) - return img - - -def crop_image(img, target_size, center): - width, height = img.size - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) - return img - - -def process_image(img): - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) - if img.mode != 'RGB': - img = img.convert('RGB') - #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 - img -= img_mean - img /= img_std - return img - - -def test_reader(paths=None, images=None): - """data generator - :param paths: path to images. - :type paths: list, each element is a str - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - """ - img_list = [] - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = Image.open(img_path) - #img = cv2.imread(img_path) - img_list.append(img) - if images is not None: - for img in images: - img_list.append(Image.fromarray(np.uint8(img))) - for im in img_list: - im = process_image(im) - yield im diff --git a/hub_module/modules/image/classification/vgg16_imagenet/module.py b/hub_module/modules/image/classification/vgg16_imagenet/module.py deleted file mode 100644 index 3cb7166e71bb77258d4ab105e658f11e46c24024..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/vgg16_imagenet/module.py +++ /dev/null @@ -1,267 +0,0 @@ -# coding=utf-8 -import os -import ast -import argparse - -import numpy as np -import paddlehub as hub -import paddle.fluid as fluid -from paddlehub.module.module import moduleinfo, runnable -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser - -from vgg16_imagenet.vgg import VGG -from vgg16_imagenet.processor import load_label_info -from vgg16_imagenet.data_feed import test_reader - - -@moduleinfo( - name="vgg16_imagenet", - version="1.1.0", - type="cv/classification", - summary= - "VGG16 is a image classfication model trained with ImageNet-2012 dataset.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class VGG16(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "vgg16_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.infer_prog = None - self.pred_out = None - self._set_config() - - def get_expected_image_width(self): - return 224 - - def get_expected_image_height(self): - return 224 - - def get_pretrained_images_mean(self): - im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) - return im_mean - - def get_pretrained_images_std(self): - im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) - return im_std - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - input_image=None, - trainable=True, - pretrained=True, - param_prefix='', - get_prediction=False, - extra_block_filters=((256, 512, 1, 2, 3), (128, 256, 1, 2, 3), - (128, 256, 0, 1, 3), (128, 256, 0, 1, 3)), - normalizations=(20., -1, -1, -1, -1, -1)): - """Distill the Head Features, so as to perform transfer learning. - - :param input_image: image tensor. - :type input_image: - :param trainable: whether to set parameters trainable. - :type trainable: bool - :param pretrained: whether to load default pretrained model. - :type pretrained: bool - :param param_prefix: the prefix of parameters. - :type param_prefix: str - :param get_prediction: whether to get prediction. - :type get_prediction: bool - :param extra_block_filters: in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - :type extra_block_filters: list - :param normalizations: params list of init scale in l2 norm, skip init - scale if param is -1. - :type normalizations: list - """ - context_prog = input_image.block.program if input_image else fluid.Program( - ) - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - image = input_image if input_image else fluid.data( - name='image', - shape=[-1, 3, 224, 224], - dtype='float32', - lod_level=0) - - backbone = VGG( - depth=16, - with_extra_blocks=not get_prediction, - normalizations=normalizations, - extra_block_filters=extra_block_filters) - - out = backbone(image) - inputs = {'image': image} - if get_prediction: - outputs = {'pred_out': out} - else: - outputs = {'body_feats': out} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - if not param_prefix: - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - main_program=context_prog, - predicate=_if_exist) - else: - exe.run(startup_program) - return inputs, outputs, context_prog - - def classification(self, - paths=None, - images=None, - use_gpu=False, - batch_size=1, - top_k=1): - """API of Classification. - :param paths: the path of images. - :type paths: list, each element is correspond to the path of an image. - :param images: data of images, [N, H, W, C] - :type images: numpy.ndarray - :param use_gpu: whether to use gpu or not. - :type use_gpu: bool - :param batch_size: bathc size. - :type batch_size: int - :param top_k: result of top k - :type top_k: int - """ - if self.infer_prog is None: - inputs, outputs, self.infer_prog = self.context( - trainable=False, pretrained=True, get_prediction=True) - self.infer_prog = self.infer_prog.clone(for_test=True) - self.pred_out = outputs['pred_out'] - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - all_images = [] - paths = paths if paths else [] - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = int(np.ceil(images_num / batch_size)) - res_list = [] - top_k = max(min(top_k, 1000), 1) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - batch_data = np.array(batch_data).astype('float32') - data_tensor = PaddleTensor(batch_data.copy()) - if use_gpu: - result = self.gpu_predictor.run([data_tensor]) - else: - result = self.cpu_predictor.run([data_tensor]) - for i, res in enumerate(result[0].as_ndarray()): - res_dict = {} - pred_label = np.argsort(res)[::-1][:top_k] - for k in pred_label: - class_name = self.label_names[int(k)].split(',')[0] - max_prob = res[k] - res_dict[class_name] = max_prob - res_list.append(res_dict) - return res_list - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = [] - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.classification( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/vgg16_imagenet/vgg.py b/hub_module/modules/image/classification/vgg16_imagenet/vgg.py deleted file mode 100644 index dc760f328947a16cddb0b1d0d2d7556fc651fbf7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/vgg16_imagenet/vgg.py +++ /dev/null @@ -1,224 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['VGG'] - - -class VGG(object): - """ - VGG, see https://arxiv.org/abs/1409.1556 - - Args: - depth (int): the VGG net depth (16 or 19) - normalizations (list): params list of init scale in l2 norm, skip init - scale if param is -1. - with_extra_blocks (bool): whether or not extra blocks should be added - extra_block_filters (list): in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=16, - with_extra_blocks=False, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], - [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]], - class_dim=1000): - assert depth in [16, 19], "depth {} not in [16, 19]" - self.depth = depth - self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} - self.with_extra_blocks = with_extra_blocks - self.normalizations = normalizations - self.extra_block_filters = extra_block_filters - self.class_dim = class_dim - - def __call__(self, input): - layers = [] - layers += self._vgg_block(input) - - if not self.with_extra_blocks: - return layers[-1] - - layers += self._add_extras_block(layers[-1]) - norm_cfg = self.normalizations - for k, v in enumerate(layers): - if not norm_cfg[k] == -1: - layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) - - return layers - - def _vgg_block(self, input): - nums = self.depth_cfg[self.depth] - vgg_base = [64, 128, 256, 512, 512] - conv = input - res_layer = [] - layers = [] - for k, v in enumerate(vgg_base): - conv = self._conv_block( - conv, v, nums[k], name="conv{}_".format(k + 1)) - layers.append(conv) - if self.with_extra_blocks: - if k == 4: - conv = self._pooling_block(conv, 3, 1, pool_padding=1) - else: - conv = self._pooling_block(conv, 2, 2) - else: - conv = self._pooling_block(conv, 2, 2) - if not self.with_extra_blocks: - fc_dim = 4096 - fc_name = ["fc6", "fc7", "fc8"] - fc1 = fluid.layers.fc( - input=conv, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_offset")) - fc2 = fluid.layers.fc( - input=fc1, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_offset")) - out = fluid.layers.fc( - input=fc2, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_offset")) - out = fluid.layers.softmax(out) - res_layer.append(out) - return [out] - else: - fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") - fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") - return [layers[3], fc7] - - def _add_extras_block(self, input): - cfg = self.extra_block_filters - conv = input - layers = [] - for k, v in enumerate(cfg): - assert len(v) == 5, "extra_block_filters size not fix" - conv = self._extra_block( - conv, - v[0], - v[1], - v[2], - v[3], - v[4], - name="conv{}_".format(6 + k)) - layers.append(conv) - - return layers - - def _conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = self._conv_layer( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=name + str(i + 1)) - return conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - padding_size, - stride_size, - filter_size, - name=None): - # 1x1 conv - conv_1 = self._conv_layer( - input=input, - num_filters=int(num_filters1), - filter_size=1, - stride=1, - act='relu', - padding=0, - name=name + "1") - - # 3x3 conv - conv_2 = self._conv_layer( - input=conv_1, - num_filters=int(num_filters2), - filter_size=filter_size, - stride=stride_size, - act='relu', - padding=padding_size, - name=name + "2") - return conv_2 - - def _conv_layer(self, - input, - num_filters, - filter_size, - stride, - padding, - dilation=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr( - name=name + "_biases") if self.with_extra_blocks else False, - name=name + '.conv2d.output.1') - return conv - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): - from paddle.fluid.layer_helper import LayerHelper - from paddle.fluid.initializer import Constant - helper = LayerHelper("Scale") - l2_norm = fluid.layers.l2_normalize( - input, axis=1) # l2 norm along channel - shape = [1] if channel_shared else [input.shape[1]] - scale = helper.create_parameter( - attr=helper.param_attr, - shape=shape, - dtype=input.dtype, - default_initializer=Constant(init_scale)) - out = fluid.layers.elementwise_mul( - x=l2_norm, - y=scale, - axis=-1 if channel_shared else 1, - name="conv4_3_norm_scale") - return out diff --git a/hub_module/modules/image/classification/xception41_imagenet/module.py b/hub_module/modules/image/classification/xception41_imagenet/module.py deleted file mode 100644 index 087a8944b9498ff9949f1ec2242ce349624edf86..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/xception41_imagenet/module.py +++ /dev/null @@ -1,312 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import math - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = "bn_" + name - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class SeparableConv(nn.Layer): - """Basic separable conv layer, it contains pointwise conv and depthwise conv.""" - def __init__(self, input_channels: int, output_channels: int, stride: int = 1, name: str = None): - super(SeparableConv, self).__init__() - - self._pointwise_conv = ConvBNLayer(input_channels, output_channels, 1, name=name + "_sep") - self._depthwise_conv = ConvBNLayer(output_channels, - output_channels, - 3, - stride=stride, - groups=output_channels, - name=name + "_dw") - - def forward(self, inputs: paddle.Tensor): - x = self._pointwise_conv(inputs) - x = self._depthwise_conv(x) - return x - - -class EntryFlowBottleneckBlock(nn.Layer): - """Basic entry flow bottleneck block for Xception.""" - def __init__(self, - input_channels: int, - output_channels: int, - stride: int = 2, - name: str = None, - relu_first: bool = False): - super(EntryFlowBottleneckBlock, self).__init__() - self.relu_first = relu_first - - self._short = Conv2d(in_channels=input_channels, - out_channels=output_channels, - kernel_size=1, - stride=stride, - padding=0, - weight_attr=ParamAttr(name + "_branch1_weights"), - bias_attr=False) - self._conv1 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") - self._conv2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") - self._pool = MaxPool2d(kernel_size=3, stride=stride, padding=1) - - def forward(self, inputs: paddle.Tensor): - conv0 = inputs - short = self._short(inputs) - if self.relu_first: - conv0 = F.relu(conv0) - conv1 = self._conv1(conv0) - conv2 = F.relu(conv1) - conv2 = self._conv2(conv2) - pool = self._pool(conv2) - return paddle.elementwise_add(x=short, y=pool) - - -class EntryFlow(nn.Layer): - """Entry flow for Xception.""" - def __init__(self, block_num: int = 3): - super(EntryFlow, self).__init__() - - name = "entry_flow" - self.block_num = block_num - self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1") - self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") - if block_num == 3: - self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) - self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=2, name=name + "_1", relu_first=True) - self._conv_2 = EntryFlowBottleneckBlock(256, 728, stride=2, name=name + "_2", relu_first=True) - elif block_num == 5: - self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) - self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=1, name=name + "_1", relu_first=True) - self._conv_2 = EntryFlowBottleneckBlock(256, 256, stride=2, name=name + "_2", relu_first=True) - self._conv_3 = EntryFlowBottleneckBlock(256, 728, stride=1, name=name + "_3", relu_first=True) - self._conv_4 = EntryFlowBottleneckBlock(728, 728, stride=2, name=name + "_4", relu_first=True) - else: - sys.exit(-1) - - def forward(self, inputs: paddle.Tensor): - x = self._conv1(inputs) - x = self._conv2(x) - - if self.block_num == 3: - x = self._conv_0(x) - x = self._conv_1(x) - x = self._conv_2(x) - elif self.block_num == 5: - x = self._conv_0(x) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._conv_3(x) - x = self._conv_4(x) - return x - - -class MiddleFlowBottleneckBlock(nn.Layer): - """Basic middle flow bottleneck block for Xception.""" - def __init__(self, input_channels: int, output_channels: int, name: str): - super(MiddleFlowBottleneckBlock, self).__init__() - - self._conv_0 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") - self._conv_1 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") - self._conv_2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2c_weights") - - def forward(self, inputs: paddle.Tensor): - conv0 = F.relu(inputs) - conv0 = self._conv_0(conv0) - conv1 = F.relu(conv0) - conv1 = self._conv_1(conv1) - conv2 = F.relu(conv1) - conv2 = self._conv_2(conv2) - return paddle.elementwise_add(x=inputs, y=conv2) - - -class MiddleFlow(nn.Layer): - """Middle flow for Xception.""" - def __init__(self, block_num: int = 8): - super(MiddleFlow, self).__init__() - - self.block_num = block_num - self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") - self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") - self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") - self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") - self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") - self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") - self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") - self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") - if block_num == 16: - self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8") - self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9") - self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10") - self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11") - self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12") - self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13") - self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14") - self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15") - - def forward(self, inputs: paddle.Tensor): - x = self._conv_0(inputs) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._conv_3(x) - x = self._conv_4(x) - x = self._conv_5(x) - x = self._conv_6(x) - x = self._conv_7(x) - if self.block_num == 16: - x = self._conv_8(x) - x = self._conv_9(x) - x = self._conv_10(x) - x = self._conv_11(x) - x = self._conv_12(x) - x = self._conv_13(x) - x = self._conv_14(x) - x = self._conv_15(x) - return x - - -class ExitFlowBottleneckBlock(nn.Layer): - """Basic exit flow bottleneck block for Xception.""" - def __init__(self, input_channels: int, output_channels1: int, output_channels2: int, name: str): - super(ExitFlowBottleneckBlock, self).__init__() - - self._short = Conv2d(in_channels=input_channels, - out_channels=output_channels2, - kernel_size=1, - stride=2, - padding=0, - weight_attr=ParamAttr(name + "_branch1_weights"), - bias_attr=False) - self._conv_1 = SeparableConv(input_channels, output_channels1, stride=1, name=name + "_branch2a_weights") - self._conv_2 = SeparableConv(output_channels1, output_channels2, stride=1, name=name + "_branch2b_weights") - self._pool = MaxPool2d(kernel_size=3, stride=2, padding=1) - - def forward(self, inputs: paddle.Tensor): - short = self._short(inputs) - conv0 = F.relu(inputs) - conv1 = self._conv_1(conv0) - conv2 = F.relu(conv1) - conv2 = self._conv_2(conv2) - pool = self._pool(conv2) - return paddle.elementwise_add(x=short, y=pool) - - -class ExitFlow(nn.Layer): - """Exit flow for Xception.""" - def __init__(self, class_dim: int): - super(ExitFlow, self).__init__() - - name = "exit_flow" - - self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") - self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") - self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") - self._pool = AdaptiveAvgPool2d(1) - stdv = 1.0 / math.sqrt(2048 * 1.0) - self._out = Linear(2048, - class_dim, - weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_offset")) - - def forward(self, inputs: paddle.Tensor): - conv0 = self._conv_0(inputs) - conv1 = self._conv_1(conv0) - conv1 = F.relu(conv1) - conv2 = self._conv_2(conv1) - conv2 = F.relu(conv2) - pool = self._pool(conv2) - pool = paddle.reshape(pool, [0, -1]) - out = self._out(pool) - return out - - -@moduleinfo(name="xception41_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="Xception41_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class Xception41(nn.Layer): - """Xception41 model.""" - def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): - super(Xception41, self).__init__() - self.entry_flow_block_num = 3 - self.middle_flow_block_num = 8 - self._entry_flow = EntryFlow(self.entry_flow_block_num) - self._middle_flow = MiddleFlow(self.middle_flow_block_num) - self._exit_flow = ExitFlow(class_dim) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'xception41_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/xception41_imagenet.pdparams -O' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._entry_flow(inputs) - x = self._middle_flow(x) - x = self._exit_flow(x) - return x diff --git a/hub_module/modules/image/classification/xception65_imagenet/module.py b/hub_module/modules/image/classification/xception65_imagenet/module.py deleted file mode 100644 index 54822055239d80fe904e14bee16cfaf4dd1e57b3..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/xception65_imagenet/module.py +++ /dev/null @@ -1,311 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import math - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = "bn_" + name - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class SeparableConv(nn.Layer): - """Basic separable conv layer, it contains pointwise conv and depthwise conv.""" - def __init__(self, input_channels: int, output_channels: int, stride: int = 1, name: str = None): - super(SeparableConv, self).__init__() - - self._pointwise_conv = ConvBNLayer(input_channels, output_channels, 1, name=name + "_sep") - self._depthwise_conv = ConvBNLayer(output_channels, - output_channels, - 3, - stride=stride, - groups=output_channels, - name=name + "_dw") - - def forward(self, inputs: paddle.Tensor): - x = self._pointwise_conv(inputs) - x = self._depthwise_conv(x) - return x - - -class EntryFlowBottleneckBlock(nn.Layer): - """Basic entry flow bottleneck block for Xception.""" - def __init__(self, - input_channels: int, - output_channels: int, - stride: int = 2, - name: str = None, - relu_first: bool = False): - super(EntryFlowBottleneckBlock, self).__init__() - self.relu_first = relu_first - - self._short = Conv2d(in_channels=input_channels, - out_channels=output_channels, - kernel_size=1, - stride=stride, - padding=0, - weight_attr=ParamAttr(name + "_branch1_weights"), - bias_attr=False) - self._conv1 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") - self._conv2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") - self._pool = MaxPool2d(kernel_size=3, stride=stride, padding=1) - - def forward(self, inputs: paddle.Tensor): - conv0 = inputs - short = self._short(inputs) - if self.relu_first: - conv0 = F.relu(conv0) - conv1 = self._conv1(conv0) - conv2 = F.relu(conv1) - conv2 = self._conv2(conv2) - pool = self._pool(conv2) - return paddle.elementwise_add(x=short, y=pool) - - -class EntryFlow(nn.Layer): - """Entry flow for Xception.""" - def __init__(self, block_num: int = 3): - super(EntryFlow, self).__init__() - - name = "entry_flow" - self.block_num = block_num - self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1") - self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") - if block_num == 3: - self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) - self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=2, name=name + "_1", relu_first=True) - self._conv_2 = EntryFlowBottleneckBlock(256, 728, stride=2, name=name + "_2", relu_first=True) - elif block_num == 5: - self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) - self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=1, name=name + "_1", relu_first=True) - self._conv_2 = EntryFlowBottleneckBlock(256, 256, stride=2, name=name + "_2", relu_first=True) - self._conv_3 = EntryFlowBottleneckBlock(256, 728, stride=1, name=name + "_3", relu_first=True) - self._conv_4 = EntryFlowBottleneckBlock(728, 728, stride=2, name=name + "_4", relu_first=True) - else: - sys.exit(-1) - - def forward(self, inputs: paddle.Tensor): - x = self._conv1(inputs) - x = self._conv2(x) - - if self.block_num == 3: - x = self._conv_0(x) - x = self._conv_1(x) - x = self._conv_2(x) - elif self.block_num == 5: - x = self._conv_0(x) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._conv_3(x) - x = self._conv_4(x) - return x - - -class MiddleFlowBottleneckBlock(nn.Layer): - """Basic middle flow bottleneck block for Xception.""" - def __init__(self, input_channels: int, output_channels: int, name: str): - super(MiddleFlowBottleneckBlock, self).__init__() - - self._conv_0 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") - self._conv_1 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") - self._conv_2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2c_weights") - - def forward(self, inputs: paddle.Tensor): - conv0 = F.relu(inputs) - conv0 = self._conv_0(conv0) - conv1 = F.relu(conv0) - conv1 = self._conv_1(conv1) - conv2 = F.relu(conv1) - conv2 = self._conv_2(conv2) - return paddle.elementwise_add(x=inputs, y=conv2) - - -class MiddleFlow(nn.Layer): - """Middle flow for Xception.""" - def __init__(self, block_num: int = 8): - super(MiddleFlow, self).__init__() - - self.block_num = block_num - self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") - self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") - self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") - self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") - self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") - self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") - self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") - self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") - if block_num == 16: - self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8") - self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9") - self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10") - self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11") - self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12") - self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13") - self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14") - self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15") - - def forward(self, inputs: paddle.Tensor): - x = self._conv_0(inputs) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._conv_3(x) - x = self._conv_4(x) - x = self._conv_5(x) - x = self._conv_6(x) - x = self._conv_7(x) - if self.block_num == 16: - x = self._conv_8(x) - x = self._conv_9(x) - x = self._conv_10(x) - x = self._conv_11(x) - x = self._conv_12(x) - x = self._conv_13(x) - x = self._conv_14(x) - x = self._conv_15(x) - return x - - -class ExitFlowBottleneckBlock(nn.Layer): - """Basic exit flow bottleneck block for Xception.""" - def __init__(self, input_channels, output_channels1, output_channels2, name): - super(ExitFlowBottleneckBlock, self).__init__() - - self._short = Conv2d(in_channels=input_channels, - out_channels=output_channels2, - kernel_size=1, - stride=2, - padding=0, - weight_attr=ParamAttr(name + "_branch1_weights"), - bias_attr=False) - self._conv_1 = SeparableConv(input_channels, output_channels1, stride=1, name=name + "_branch2a_weights") - self._conv_2 = SeparableConv(output_channels1, output_channels2, stride=1, name=name + "_branch2b_weights") - self._pool = MaxPool2d(kernel_size=3, stride=2, padding=1) - - def forward(self, inputs: paddle.Tensor): - short = self._short(inputs) - conv0 = F.relu(inputs) - conv1 = self._conv_1(conv0) - conv2 = F.relu(conv1) - conv2 = self._conv_2(conv2) - pool = self._pool(conv2) - return paddle.elementwise_add(x=short, y=pool) - - -class ExitFlow(nn.Layer): - """Exit flow for Xception.""" - def __init__(self, class_dim): - super(ExitFlow, self).__init__() - - name = "exit_flow" - - self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") - self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") - self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") - self._pool = AdaptiveAvgPool2d(1) - stdv = 1.0 / math.sqrt(2048 * 1.0) - self._out = Linear(2048, - class_dim, - weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_offset")) - - def forward(self, inputs: paddle.Tensor): - conv0 = self._conv_0(inputs) - conv1 = self._conv_1(conv0) - conv1 = F.relu(conv1) - conv2 = self._conv_2(conv1) - conv2 = F.relu(conv2) - pool = self._pool(conv2) - pool = paddle.reshape(pool, [0, -1]) - out = self._out(pool) - return out - - -@moduleinfo(name="xception65_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="Xception65_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class Xception65(nn.Layer): - def __init__(self, class_dim=1000, load_checkpoint: str = None): - super(Xception65, self).__init__() - self.entry_flow_block_num = 3 - self.middle_flow_block_num = 16 - self._entry_flow = EntryFlow(self.entry_flow_block_num) - self._middle_flow = MiddleFlow(self.middle_flow_block_num) - self._exit_flow = ExitFlow(class_dim) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'xception65_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/xception65_imagenet.pdparams -O' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs): - x = self._entry_flow(inputs) - x = self._middle_flow(x) - x = self._exit_flow(x) - return x diff --git a/hub_module/modules/image/classification/xception71_imagenet/module.py b/hub_module/modules/image/classification/xception71_imagenet/module.py deleted file mode 100644 index 41cc0e0e9ad17fb0685f0be3434f7c04667c2fcc..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/classification/xception71_imagenet/module.py +++ /dev/null @@ -1,310 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import math - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddle.nn.initializer import Uniform -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - def __init__(self, - num_channels: int, - num_filters: int, - filter_size: int, - stride: int = 1, - groups: int = 1, - act: str = None, - name: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2d(in_channels=num_channels, - out_channels=num_filters, - kernel_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = "bn_" + name - self._batch_norm = BatchNorm(num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs: paddle.Tensor): - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class SeparableConv(nn.Layer): - """Basic separable conv layer, it contains pointwise conv and depthwise conv.""" - def __init__(self, input_channels: int, output_channels: int, stride: int = 1, name: str = None): - super(SeparableConv, self).__init__() - - self._pointwise_conv = ConvBNLayer(input_channels, output_channels, 1, name=name + "_sep") - self._depthwise_conv = ConvBNLayer(output_channels, - output_channels, - 3, - stride=stride, - groups=output_channels, - name=name + "_dw") - - def forward(self, inputs: paddle.Tensor): - x = self._pointwise_conv(inputs) - x = self._depthwise_conv(x) - return x - - -class EntryFlowBottleneckBlock(nn.Layer): - """Basic entry flow bottleneck block for Xception.""" - def __init__(self, - input_channels: int, - output_channels: int, - stride: int = 2, - name: str = None, - relu_first: bool = False): - super(EntryFlowBottleneckBlock, self).__init__() - self.relu_first = relu_first - - self._short = Conv2d(in_channels=input_channels, - out_channels=output_channels, - kernel_size=1, - stride=stride, - padding=0, - weight_attr=ParamAttr(name + "_branch1_weights"), - bias_attr=False) - self._conv1 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") - self._conv2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") - self._pool = MaxPool2d(kernel_size=3, stride=stride, padding=1) - - def forward(self, inputs: paddle.Tensor): - conv0 = inputs - short = self._short(inputs) - if self.relu_first: - conv0 = F.relu(conv0) - conv1 = self._conv1(conv0) - conv2 = F.relu(conv1) - conv2 = self._conv2(conv2) - pool = self._pool(conv2) - return paddle.elementwise_add(x=short, y=pool) - - -class EntryFlow(nn.Layer): - """Entry flow for Xception.""" - def __init__(self, block_num: int = 3): - super(EntryFlow, self).__init__() - - name = "entry_flow" - self.block_num = block_num - self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1") - self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") - if block_num == 3: - self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) - self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=2, name=name + "_1", relu_first=True) - self._conv_2 = EntryFlowBottleneckBlock(256, 728, stride=2, name=name + "_2", relu_first=True) - elif block_num == 5: - self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) - self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=1, name=name + "_1", relu_first=True) - self._conv_2 = EntryFlowBottleneckBlock(256, 256, stride=2, name=name + "_2", relu_first=True) - self._conv_3 = EntryFlowBottleneckBlock(256, 728, stride=1, name=name + "_3", relu_first=True) - self._conv_4 = EntryFlowBottleneckBlock(728, 728, stride=2, name=name + "_4", relu_first=True) - else: - sys.exit(-1) - - def forward(self, inputs: paddle.Tensor): - x = self._conv1(inputs) - x = self._conv2(x) - - if self.block_num == 3: - x = self._conv_0(x) - x = self._conv_1(x) - x = self._conv_2(x) - elif self.block_num == 5: - x = self._conv_0(x) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._conv_3(x) - x = self._conv_4(x) - return x - - -class MiddleFlowBottleneckBlock(nn.Layer): - """Basic middle flow bottleneck block for Xception.""" - def __init__(self, input_channels: int, output_channels: int, name: str): - super(MiddleFlowBottleneckBlock, self).__init__() - - self._conv_0 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") - self._conv_1 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") - self._conv_2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2c_weights") - - def forward(self, inputs: paddle.Tensor): - conv0 = F.relu(inputs) - conv0 = self._conv_0(conv0) - conv1 = F.relu(conv0) - conv1 = self._conv_1(conv1) - conv2 = F.relu(conv1) - conv2 = self._conv_2(conv2) - return paddle.elementwise_add(x=inputs, y=conv2) - - -class MiddleFlow(nn.Layer): - """Middle flow for Xception.""" - def __init__(self, block_num: int = 8): - super(MiddleFlow, self).__init__() - - self.block_num = block_num - self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") - self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") - self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") - self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") - self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") - self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") - self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") - self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") - if block_num == 16: - self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8") - self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9") - self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10") - self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11") - self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12") - self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13") - self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14") - self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15") - - def forward(self, inputs: paddle.Tensor): - x = self._conv_0(inputs) - x = self._conv_1(x) - x = self._conv_2(x) - x = self._conv_3(x) - x = self._conv_4(x) - x = self._conv_5(x) - x = self._conv_6(x) - x = self._conv_7(x) - if self.block_num == 16: - x = self._conv_8(x) - x = self._conv_9(x) - x = self._conv_10(x) - x = self._conv_11(x) - x = self._conv_12(x) - x = self._conv_13(x) - x = self._conv_14(x) - x = self._conv_15(x) - return x - - -class ExitFlowBottleneckBlock(nn.Layer): - """Basic exit flow bottleneck block for Xception.""" - def __init__(self, input_channels: int, output_channels1: int, output_channels2: int, name: str): - super(ExitFlowBottleneckBlock, self).__init__() - - self._short = Conv2d(in_channels=input_channels, - out_channels=output_channels2, - kernel_size=1, - stride=2, - padding=0, - weight_attr=ParamAttr(name + "_branch1_weights"), - bias_attr=False) - self._conv_1 = SeparableConv(input_channels, output_channels1, stride=1, name=name + "_branch2a_weights") - self._conv_2 = SeparableConv(output_channels1, output_channels2, stride=1, name=name + "_branch2b_weights") - self._pool = MaxPool2d(kernel_size=3, stride=2, padding=1) - - def forward(self, inputs: paddle.Tensor): - short = self._short(inputs) - conv0 = F.relu(inputs) - conv1 = self._conv_1(conv0) - conv2 = F.relu(conv1) - conv2 = self._conv_2(conv2) - pool = self._pool(conv2) - return paddle.elementwise_add(x=short, y=pool) - - -class ExitFlow(nn.Layer): - def __init__(self, class_dim: int): - super(ExitFlow, self).__init__() - - name = "exit_flow" - - self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") - self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") - self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") - self._pool = AdaptiveAvgPool2d(1) - stdv = 1.0 / math.sqrt(2048 * 1.0) - self._out = Linear(2048, - class_dim, - weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)), - bias_attr=ParamAttr(name="fc_offset")) - - def forward(self, inputs: paddle.Tensor): - conv0 = self._conv_0(inputs) - conv1 = self._conv_1(conv0) - conv1 = F.relu(conv1) - conv2 = self._conv_2(conv1) - conv2 = F.relu(conv2) - pool = self._pool(conv2) - pool = paddle.reshape(pool, [0, -1]) - out = self._out(pool) - return out - - -@moduleinfo(name="xception71_imagenet", - type="CV/classification", - author="paddlepaddle", - author_email="", - summary="Xception71_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class Xception71(nn.Layer): - def __init__(self, class_dim=1000, load_checkpoint: str = None): - super(Xception71, self).__init__() - self.entry_flow_block_num = 5 - self.middle_flow_block_num = 16 - self._entry_flow = EntryFlow(self.entry_flow_block_num) - self._middle_flow = MiddleFlow(self.middle_flow_block_num) - self._exit_flow = ExitFlow(class_dim) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'xception71_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/xception71_imagenet.pdparams -O' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs): - x = self._entry_flow(inputs) - x = self._middle_flow(x) - x = self._exit_flow(x) - return x diff --git a/hub_module/modules/image/colorization/user_guided_colorization/data_feed.py b/hub_module/modules/image/colorization/user_guided_colorization/data_feed.py deleted file mode 100644 index 984cb45701e6939de650b42d0b9f0046f83860bd..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/colorization/user_guided_colorization/data_feed.py +++ /dev/null @@ -1,133 +0,0 @@ -import paddle -import numpy as np - - -class ColorizeHint: - """Get hint and mask images for colorization. - - This method is prepared for user guided colorization tasks. Take the original RGB images as imput, - we will obtain the local hints and correspoding mask to guid colorization process. - - Args: - percent(float): Probability for ignoring hint in an iteration. - num_points(int): Number of selected hints in an iteration. - samp(str): Sample method, default is normal. - use_avg(bool): Whether to use mean in selected hint area. - - Return: - hint(np.ndarray): hint images - mask(np.ndarray): mask images - """ - def __init__(self, percent: float, num_points: int = None, samp: str = 'normal', use_avg: bool = True): - self.percent = percent - self.num_points = num_points - self.samp = samp - self.use_avg = use_avg - - def __call__(self, data: np.ndarray, hint: np.ndarray, mask: np.ndarray): - sample_Ps = [1, 2, 3, 4, 5, 6, 7, 8, 9] - self.data = data - self.hint = hint - self.mask = mask - N, C, H, W = data.shape - for nn in range(N): - pp = 0 - cont_cond = True - while cont_cond: - if self.num_points is None: # draw from geometric - # embed() - cont_cond = np.random.rand() > (1 - self.percent) - else: # add certain number of points - cont_cond = pp < self.num_points - if not cont_cond: # skip out of loop if condition not met - continue - P = np.random.choice(sample_Ps) # patch size - # sample location - if self.samp == 'normal': # geometric distribution - h = int(np.clip(np.random.normal((H - P + 1) / 2., (H - P + 1) / 4.), 0, H - P)) - w = int(np.clip(np.random.normal((W - P + 1) / 2., (W - P + 1) / 4.), 0, W - P)) - else: # uniform distribution - h = np.random.randint(H - P + 1) - w = np.random.randint(W - P + 1) - # add color point - if self.use_avg: - # embed() - hint[nn, :, h:h + P, w:w + P] = np.mean(np.mean(data[nn, :, h:h + P, w:w + P], - axis=2, - keepdims=True), - axis=1, - keepdims=True).reshape(1, C, 1, 1) - else: - hint[nn, :, h:h + P, w:w + P] = data[nn, :, h:h + P, w:w + P] - mask[nn, :, h:h + P, w:w + P] = 1 - # increment counter - pp += 1 - - mask -= 0.5 - return hint, mask - - -class ColorizePreprocess: - """Prepare dataset for image Colorization. - - Args: - ab_thresh(float): Thresh value for setting mask value. - p(float): Probability for ignoring hint in an iteration. - num_points(int): Number of selected hints in an iteration. - samp(str): Sample method, default is normal. - use_avg(bool): Whether to use mean in selected hint area. - is_train(bool): Training process or not. - - Return: - data(dict):The preprocessed data for colorization. - - """ - def __init__(self, - ab_thresh: float = 0., - p: float = 0., - num_points: int = None, - samp: str = 'normal', - use_avg: bool = True): - self.ab_thresh = ab_thresh - self.p = p - self.num_points = num_points - self.samp = samp - self.use_avg = use_avg - self.gethint = ColorizeHint(percent=self.p, num_points=self.num_points, samp=self.samp, use_avg=self.use_avg) - - def __call__(self, data_lab): - """ - This method seperates the L channel and AB channel, obtain hint, mask and real_B_enc as the input for colorization task. - - Args: - img(np.ndarray|paddle.Tensor): LAB image. - - Returns: - data(dict):The preprocessed data for colorization. - """ - if type(data_lab) is not np.ndarray: - data_lab = data_lab.numpy() - data = {} - A = 2 * 110 / 10 + 1 - data['A'] = data_lab[:, [0], :, :] - data['B'] = data_lab[:, 1:, :, :] - if self.ab_thresh > 0: # mask out grayscale images - thresh = 1. * self.ab_thresh / 110 - mask = np.sum(np.abs(np.max(np.max(data['B'], axis=3), axis=2) - np.min(np.min(data['B'], axis=3), axis=2)), - axis=1) - mask = (mask >= thresh) - data['A'] = data['A'][mask, :, :, :] - data['B'] = data['B'][mask, :, :, :] - if np.sum(mask) == 0: - return None - data_ab_rs = np.round((data['B'][:, :, ::4, ::4] * 110. + 110.) / 10.) # normalized bin number - data['real_B_enc'] = data_ab_rs[:, [0], :, :] * A + data_ab_rs[:, [1], :, :] - data['hint_B'] = np.zeros(shape=data['B'].shape) - data['mask_B'] = np.zeros(shape=data['A'].shape) - data['hint_B'], data['mask_B'] = self.gethint(data['B'], data['hint_B'], data['mask_B']) - data['A'] = paddle.to_tensor(data['A'].astype(np.float32)) - data['B'] = paddle.to_tensor(data['B'].astype(np.float32)) - data['real_B_enc'] = paddle.to_tensor(data['real_B_enc'].astype(np.int64)) - data['hint_B'] = paddle.to_tensor(data['hint_B'].astype(np.float32)) - data['mask_B'] = paddle.to_tensor(data['mask_B'].astype(np.float32)) - return data diff --git a/hub_module/modules/image/colorization/user_guided_colorization/module.py b/hub_module/modules/image/colorization/user_guided_colorization/module.py deleted file mode 100644 index 10f125e1ef8c4f48d40f32dfc5575a2b1dadcca8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/colorization/user_guided_colorization/module.py +++ /dev/null @@ -1,246 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import paddle -import paddle.nn as nn -from paddle.nn import Conv2D, Conv2DTranspose -from paddlehub.module.module import moduleinfo -import paddlehub.process.transforms as T -from paddlehub.module.cv_module import ImageColorizeModule -from user_guided_colorization.data_feed import ColorizePreprocess - - -@moduleinfo( - name="user_guided_colorization", - type="CV/image_editing", - author="paddlepaddle", - author_email="", - summary="User_guided_colorization is a image colorization model, this module is trained with ILSVRC2012 dataset.", - version="1.0.0", - meta=ImageColorizeModule) -class UserGuidedColorization(nn.Layer): - """ - Userguidedcolorization, see https://github.com/haoyuying/colorization-pytorch - - Args: - use_tanh (bool): Whether to use tanh as final activation function. - classification (bool): Whether to switch classification branch for optimization. - load_checkpoint (str): Pretrained checkpoint path. - - """ - def __init__(self, use_tanh: bool = True, classification: bool = True, load_checkpoint: str = None): - super(UserGuidedColorization, self).__init__() - self.input_nc = 4 - self.output_nc = 2 - self.classification = classification - # Conv1 - model1 = ( - Conv2D(self.input_nc, 64, 3, 1, 1), - nn.ReLU(), - Conv2D(64, 64, 3, 1, 1), - nn.ReLU(), - nn.BatchNorm(64), - ) - - # Conv2 - model2 = ( - Conv2D(64, 128, 3, 1, 1), - nn.ReLU(), - Conv2D(128, 128, 3, 1, 1), - nn.ReLU(), - nn.BatchNorm(128), - ) - - # Conv3 - model3 = ( - Conv2D(128, 256, 3, 1, 1), - nn.ReLU(), - Conv2D(256, 256, 3, 1, 1), - nn.ReLU(), - Conv2D(256, 256, 3, 1, 1), - nn.ReLU(), - nn.BatchNorm(256), - ) - - # Conv4 - model4 = ( - Conv2D(256, 512, 3, 1, 1), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 1), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 1), - nn.ReLU(), - nn.BatchNorm(512), - ) - - # Conv5 - model5 = ( - Conv2D(512, 512, 3, 1, 2, 2), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 2, 2), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 2, 2), - nn.ReLU(), - nn.BatchNorm(512), - ) - - # Conv6 - model6 = ( - Conv2D(512, 512, 3, 1, 2, 2), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 2, 2), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 2, 2), - nn.ReLU(), - nn.BatchNorm(512), - ) - - # Conv7 - model7 = ( - Conv2D(512, 512, 3, 1, 1), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 1), - nn.ReLU(), - Conv2D(512, 512, 3, 1, 1), - nn.ReLU(), - nn.BatchNorm(512), - ) - - # Conv8 - model8up = (Conv2DTranspose(512, 256, kernel_size=4, stride=2, padding=1), ) - model3short8 = (Conv2D(256, 256, 3, 1, 1), ) - model8 = ( - nn.ReLU(), - Conv2D(256, 256, 3, 1, 1), - nn.ReLU(), - Conv2D(256, 256, 3, 1, 1), - nn.ReLU(), - nn.BatchNorm(256), - ) - - # Conv9 - model9up = (Conv2DTranspose(256, 128, kernel_size=4, stride=2, padding=1), ) - model2short9 = (Conv2D( - 128, - 128, - 3, - 1, - 1, - ), ) - model9 = (nn.ReLU(), Conv2D(128, 128, 3, 1, 1), nn.ReLU(), nn.BatchNorm(128)) - - # Conv10 - model10up = (Conv2DTranspose(128, 128, kernel_size=4, stride=2, padding=1), ) - model1short10 = (Conv2D(64, 128, 3, 1, 1), ) - model10 = (nn.ReLU(), Conv2D(128, 128, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2)) - model_class = (Conv2D(256, 529, 1), ) - - if use_tanh: - model_out = (Conv2D(128, 2, 1, 1, 0, 1), nn.Tanh()) - else: - model_out = (Conv2D(128, 2, 1, 1, 0, 1), ) - - self.model1 = nn.Sequential(*model1) - self.model2 = nn.Sequential(*model2) - self.model3 = nn.Sequential(*model3) - self.model4 = nn.Sequential(*model4) - self.model5 = nn.Sequential(*model5) - self.model6 = nn.Sequential(*model6) - self.model7 = nn.Sequential(*model7) - self.model8up = nn.Sequential(*model8up) - self.model8 = nn.Sequential(*model8) - self.model9up = nn.Sequential(*model9up) - self.model9 = nn.Sequential(*model9) - self.model10up = nn.Sequential(*model10up) - self.model10 = nn.Sequential(*model10) - self.model3short8 = nn.Sequential(*model3short8) - self.model2short9 = nn.Sequential(*model2short9) - self.model1short10 = nn.Sequential(*model1short10) - self.model_class = nn.Sequential(*model_class) - self.model_out = nn.Sequential(*model_out) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint) - self.set_dict(model_dict) - print("load custom checkpoint success") - else: - checkpoint = os.path.join(self.directory, 'user_guided.pdparams') - if not os.path.exists(checkpoint): - os.system('wget https://paddlehub.bj.bcebos.com/dygraph/image_colorization/user_guided.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint) - - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def transforms(self, images: str, is_train: bool = True) -> callable: - if is_train: - transform = T.Compose( - [T.Resize((256, 256), interp='NEAREST'), - T.RandomPaddingCrop(crop_size=176), - T.RGB2LAB()], - stay_rgb=True, - is_permute=False) - else: - transform = T.Compose([T.Resize( - (256, 256), interp='NEAREST'), T.RGB2LAB()], - stay_rgb=True, - is_permute=False) - return transform(images) - - def preprocess(self, inputs: paddle.Tensor, ab_thresh: float = 0., prob: float = 0.): - self.preprocess = ColorizePreprocess(ab_thresh=ab_thresh, p=prob) - return self.preprocess(inputs) - - def forward(self, - input_A: paddle.Tensor, - input_B: paddle.Tensor, - mask_B: paddle.Tensor, - real_b: paddle.Tensor = None, - real_B_enc: paddle.Tensor = None) -> paddle.Tensor: - conv1_2 = self.model1(paddle.concat([input_A, input_B, mask_B], axis=1)) - conv2_2 = self.model2(conv1_2[:, :, ::2, ::2]) - conv3_3 = self.model3(conv2_2[:, :, ::2, ::2]) - conv4_3 = self.model4(conv3_3[:, :, ::2, ::2]) - conv5_3 = self.model5(conv4_3) - conv6_3 = self.model6(conv5_3) - conv7_3 = self.model7(conv6_3) - conv8_up = self.model8up(conv7_3) + self.model3short8(conv3_3) - conv8_3 = self.model8(conv8_up) - - if self.classification: - out_class = self.model_class(conv8_3) - conv9_up = self.model9up(conv8_3.detach()) + self.model2short9(conv2_2.detach()) - conv9_3 = self.model9(conv9_up) - conv10_up = self.model10up(conv9_3) + self.model1short10(conv1_2.detach()) - conv10_2 = self.model10(conv10_up) - out_reg = self.model_out(conv10_2) - else: - out_class = self.model_class(conv8_3.detach()) - conv9_up = self.model9up(conv8_3) + self.model2short9(conv2_2) - conv9_3 = self.model9(conv9_up) - conv10_up = self.model10up(conv9_3) + self.model1short10(conv1_2) - conv10_2 = self.model10(conv10_up) - out_reg = self.model_out(conv10_2) - - return out_class, out_reg - - -if __name__ == "__main__": - place = paddle.CUDAPlace(0) - paddle.disable_static() - model = UserGuidedColorization() - model.eval() diff --git a/hub_module/modules/image/face_detection/pyramidbox_face_detection/data_feed.py b/hub_module/modules/image/face_detection/pyramidbox_face_detection/data_feed.py deleted file mode 100644 index 39890779e3f8699a78065a9b68ae6880dfdfc2e2..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_face_detection/data_feed.py +++ /dev/null @@ -1,125 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - - -def preprocess(image): - if image.mode == 'L': - image = image.convert('RGB') - shrink, max_shrink = get_shrink(image.size[1], image.size[0]) - image_shape = [3, image.size[1], image.size[0]] - if shrink != 1: - h, w = int(image_shape[1] * shrink), int(image_shape[2] * shrink) - image = image.resize((w, h), Image.ANTIALIAS) - image_shape = [3, h, w] - - img = np.array(image) - img = to_chw_bgr(img) - mean = [104., 117., 123.] - scale = 0.007843 - img = img.astype('float32') - img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') - img = img * scale - img = np.array(img) - return img - - -def to_chw_bgr(image): - """ - Transpose image from HWC to CHW and from RBG to BGR. - - Args: - image (np.array): an image with HWC and RBG layout. - """ - # HWC to CHW - if len(image.shape) == 3: - image = np.swapaxes(image, 1, 2) - image = np.swapaxes(image, 1, 0) - # RBG to BGR - image = image[[2, 1, 0], :, :] - return image - - -def get_shrink(height, width): - """ - shrink the original image according to the org_im_height and org_im_width. - calculate the value of shrink. - - Args: - height (int): image height. - width (int): image width. - """ - max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5 - max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5 - - def get_round(x, loc): - str_x = str(x) - if '.' in str_x: - str_before, str_after = str_x.split('.') - len_after = len(str_after) - if len_after >= 3: - str_final = str_before + '.' + str_after[0:loc] - return float(str_final) - else: - return x - - max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3 - if max_shrink >= 1.5 and max_shrink < 2: - max_shrink = max_shrink - 0.1 - elif max_shrink >= 2 and max_shrink < 3: - max_shrink = max_shrink - 0.2 - elif max_shrink >= 3 and max_shrink < 4: - max_shrink = max_shrink - 0.3 - elif max_shrink >= 4 and max_shrink < 5: - max_shrink = max_shrink - 0.4 - elif max_shrink >= 5: - max_shrink = max_shrink - 0.5 - elif max_shrink <= 0.1: - max_shrink = 0.1 - shrink = max_shrink if max_shrink < 1 else 1 - return shrink, max_shrink - - -def reader(images, paths): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths is not None: - assert type(paths) is list, "paths should be a list." - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - each['org_im'] = Image.open(im_path) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - each['org_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = Image.fromarray( - cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) - each['org_im_width'], each['org_im_height'] = each['org_im'].size - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - component.append(each) - - for element in component: - element['image'] = preprocess(element['org_im']) - yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_face_detection/module.py b/hub_module/modules/image/face_detection/pyramidbox_face_detection/module.py deleted file mode 100644 index c62b8f4374161e330ccf3a3366ae7c7f6aa14b64..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_face_detection/module.py +++ /dev/null @@ -1,206 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from pyramidbox_face_detection.data_feed import reader -from pyramidbox_face_detection.processor import postprocess, base64_to_cv2 - - -@moduleinfo( - name="pyramidbox_face_detection", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary="Baidu's PyramidBox model for face detection.", - version="1.1.0") -class PyramidBoxFaceDetection(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "pyramidbox_face_detection_widerface") - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def face_detection(self, - images=None, - paths=None, - data=None, - use_gpu=False, - output_dir='detection_result', - visualization=False, - score_thresh=0.15): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): The paths of images. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): score threshold to limit the detection result. - - Returns: - res (list[dict]): The result of face detection, keys are 'data' and 'path', the correspoding values are: - data (list[dict]): 5 keys, where - 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, - 'confidence' is the confidence this bbox. - path (str): The path of original image. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data: - if 'image' in data: - if paths is None: - paths = list() - paths += data['image'] - - res = list() - # process one by one - for element in reader(images, paths): - image = np.expand_dims(element['image'], axis=0).astype('float32') - image_tensor = PaddleTensor(image.copy()) - data_out = self.gpu_predictor.run([ - image_tensor - ]) if use_gpu else self.cpu_predictor.run([image_tensor]) - # print(len(data_out)) # 1 - out = postprocess( - data_out=data_out[0].as_ndarray(), - org_im=element['org_im'], - org_im_path=element['org_im_path'], - org_im_width=element['org_im_width'], - org_im_height=element['org_im_height'], - output_dir=output_dir, - visualization=visualization, - score_thresh=score_thresh) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.15, - help="score threshold of face detection.") diff --git a/hub_module/modules/image/face_detection/pyramidbox_face_detection/processor.py b/hub_module/modules/image/face_detection/pyramidbox_face_detection/processor.py deleted file mode 100644 index d5f3101823077daa2122438e4cabab12657e3257..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_face_detection/processor.py +++ /dev/null @@ -1,132 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from collections import OrderedDict - -import base64 -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, org_im_path, output_dir): - """ - Get save image name. - """ - # name prefix of original image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - # extension - if img.mode == 'RGBA': - ext = '.png' - else: - ext = '.jpg' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - return save_im_path - - -def draw_bboxes(image, bboxes, org_im_path, output_dir): - """ - Draw bounding boxes on image. - - Args: - bboxes (np.array): bounding boxes. - """ - draw = ImageDraw.Draw(image) - for i in range(len(bboxes)): - xmin, ymin, xmax, ymax = bboxes[i] - (left, right, top, bottom) = (xmin, xmax, ymin, ymax) - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=4, - fill='red') - save_name = get_save_image_name(image, org_im_path, output_dir) - image.save(save_name) - - -def postprocess(data_out, org_im, org_im_path, org_im_width, org_im_height, - output_dir, visualization, score_thresh): - """ - Postprocess output of network. one image at a time. - - Args: - data_out (numpy.ndarray): output of network. - org_im: (PIL.Image object): original image. - org_im_path (str): path of original image. - org_im_width (int): width of original image. - org_im_height (int): height of original image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - - Returns: - output (dict): keys are 'data' and 'path', the correspoding values are: - data (list[dict]): 5 keys, where - 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, - 'confidence' is the confidence this bbox. - path (str): The path of original image. - """ - output = dict() - output['data'] = list() - output['path'] = org_im_path - - if data_out.shape[0] == 0: - print("No face detected in {}".format(org_im_path)) - else: - det_conf = data_out[:, 1] - det_xmin = org_im_width * data_out[:, 2] - det_ymin = org_im_height * data_out[:, 3] - det_xmax = org_im_width * data_out[:, 4] - det_ymax = org_im_height * data_out[:, 5] - dets = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, - det_conf)) - keep_index = np.where(dets[:, 4] >= score_thresh)[0] - dets = dets[keep_index, :] - - if dets.shape[0] == 0: - print("No face detected in {}".format(org_im_path)) - else: - for detect_face in dets: - dt_i = dict() - dt_i['left'] = float(detect_face[0]) - dt_i['top'] = float(detect_face[1]) - dt_i['right'] = float(detect_face[2]) - dt_i['bottom'] = float(detect_face[3]) - dt_i['confidence'] = float(detect_face[4]) - output['data'].append(dt_i) - - if visualization: - check_dir(output_dir) - draw_bboxes(org_im, dets[:, 0:4], org_im_path, output_dir) - return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/data_feed.py b/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/data_feed.py deleted file mode 100644 index 92ad422074b8bbd24e09d86cfaebc10159b2bc75..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/data_feed.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def preprocess(org_im, shrink): - image = org_im.copy() - image_height, image_width, image_channel = image.shape - if shrink != 1: - image_height, image_width = int(image_height * shrink), int( - image_width * shrink) - image = cv2.resize(image, (image_width, image_height), - cv2.INTER_NEAREST) - # HWC to CHW - if len(image.shape) == 3: - image = np.swapaxes(image, 1, 2) - image = np.swapaxes(image, 1, 0) - # mean, std - mean = [104., 117., 123.] - scale = 0.007843 - image = image.astype('float32') - image -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') - image = image * scale - return image, image_height, image_width - - -def reader(images, paths, shrink): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): paths to images. - shrink (float): parameter to control the resize scale in preprocess. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths is not None: - assert type(paths) is list, "paths should be a list." - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['org_im'] = im - each['org_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - component.append(each) - - for element in component: - element['image'], element['image_height'], element[ - 'image_width'] = preprocess(element['org_im'], shrink) - yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/module.py b/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/module.py deleted file mode 100644 index 8d0294a55eb59f83011b90dce8e4b8369dc1e066..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/module.py +++ /dev/null @@ -1,220 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from pyramidbox_lite_mobile.data_feed import reader -from pyramidbox_lite_mobile.processor import postprocess, base64_to_cv2 - - -@moduleinfo( - name="pyramidbox_lite_mobile", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary="PyramidBox-Lite-Mobile is a high-performance face detection model.", - version="1.2.0") -class PyramidBoxLiteMobile(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "pyramidbox_lite_mobile_face_detection") - self._set_config() - self.processor = self - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def face_detection(self, - images=None, - paths=None, - data=None, - use_gpu=False, - output_dir='detection_result', - visualization=False, - shrink=0.5, - confs_threshold=0.6): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): The paths of images. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - shrink (float): parameter to control the resize scale in preprocess. - confs_threshold (float): confidence threshold. - - Returns: - res (list[dict]): The result of face detection and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data: - if 'image' in data: - if paths is None: - paths = list() - paths += data['image'] - elif 'data' in data: - if images is None: - images = list() - images += data['data'] - - res = list() - # process one by one - for element in reader(images, paths, shrink): - image = np.expand_dims(element['image'], axis=0).astype('float32') - image_tensor = PaddleTensor(image.copy()) - data_out = self.gpu_predictor.run([ - image_tensor - ]) if use_gpu else self.cpu_predictor.run([image_tensor]) - out = postprocess( - data_out=data_out[0].as_ndarray(), - org_im=element['org_im'], - org_im_path=element['org_im_path'], - image_width=element['image_width'], - image_height=element['image_height'], - output_dir=output_dir, - visualization=visualization, - shrink=shrink, - confs_threshold=confs_threshold) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - var = program.global_block().vars['detection_output_0.tmp_1'] - var.desc.set_dtype(fluid.core.VarDesc.VarType.INT32) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--shrink', - type=ast.literal_eval, - default=0.5, - help= - "resize the image to shrink * original_shape before feeding into network." - ) - self.arg_input_group.add_argument( - '--confs_threshold', - type=ast.literal_eval, - default=0.6, - help="confidence threshold.") diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/processor.py b/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/processor.py deleted file mode 100644 index 25f8fa3d9dda6078cc65dd4017f4e5df7b96eaa6..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/processor.py +++ /dev/null @@ -1,119 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from collections import OrderedDict - -import base64 -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['base64_to_cv2', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of original image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - # extension - img = Image.fromarray(org_im[:, :, ::-1]) - if img.mode == 'RGBA': - ext = '.png' - elif img.mode == 'RGB': - ext = '.jpg' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path - - -def clip_bbox(bbox, img_height, img_width): - bbox['left'] = int(max(min(bbox['left'], img_width), 0.)) - bbox['top'] = int(max(min(bbox['top'], img_height), 0.)) - bbox['right'] = int(max(min(bbox['right'], img_width), 0.)) - bbox['bottom'] = int(max(min(bbox['bottom'], img_height), 0.)) - return bbox - - -def postprocess(data_out, org_im, org_im_path, image_height, image_width, - output_dir, visualization, shrink, confs_threshold): - """ - Postprocess output of network. one image at a time. - - Args: - data_out (numpy.ndarray): output of network. - org_im (numpy.ndarray): original image. - org_im_path (list): path of riginal image. - image_height (int): height of preprocessed image. - image_width (int): width of preprocessed image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - shrink (float): parameter to control the resize scale in preprocess. - confs_threshold (float): confidence threshold. - - Returns: - output (dict): keys are 'data' and 'path', the correspoding values are: - data (list[dict]): 5 keys, where - 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, - 'confidence' is the confidence this bbox. - path (str): The path of original image. - """ - output = dict() - output['data'] = list() - output['path'] = org_im_path - - for each_data in data_out: - # each_data is a list: [label, confidence, left, top, right, bottom] - if each_data[1] > confs_threshold: - dt_bbox = dict() - dt_bbox['confidence'] = float(each_data[1]) - dt_bbox['left'] = image_width * each_data[2] / shrink - dt_bbox['top'] = image_height * each_data[3] / shrink - dt_bbox['right'] = image_width * each_data[4] / shrink - dt_bbox['bottom'] = image_height * each_data[5] / shrink - dt_bbox = clip_bbox(dt_bbox, org_im.shape[0], org_im.shape[1]) - output['data'].append(dt_bbox) - - if visualization: - check_dir(output_dir) - save_im_path = get_save_image_name(org_im, org_im_path, output_dir) - im_out = org_im.copy() - if len(output['data']) > 0: - for bbox in output['data']: - cv2.rectangle(im_out, (bbox['left'], bbox['top']), - (bbox['right'], bbox['bottom']), (255, 255, 0), 2) - cv2.imwrite(save_im_path, im_out) - - return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/data_feed.py b/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/data_feed.py deleted file mode 100644 index cce81017a4572eef73a305699c6cda78aea5f33d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/data_feed.py +++ /dev/null @@ -1,212 +0,0 @@ -# coding=utf-8 -import os -import math -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['reader'] - -multi_scales = [0.3, 0.6, 0.9] - - -def bbox_vote(det): - order = det[:, 4].ravel().argsort()[::-1] - det = det[order, :] - if det.shape[0] == 0: - dets = np.array([[10, 10, 20, 20, 0.002]]) - det = np.empty(shape=[0, 5]) - while det.shape[0] > 0: - # IOU - area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) - xx1 = np.maximum(det[0, 0], det[:, 0]) - yy1 = np.maximum(det[0, 1], det[:, 1]) - xx2 = np.minimum(det[0, 2], det[:, 2]) - yy2 = np.minimum(det[0, 3], det[:, 3]) - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - o = inter / (area[0] + area[:] - inter) - # nms - merge_index = np.where(o >= 0.3)[0] - det_accu = det[merge_index, :] - det = np.delete(det, merge_index, 0) - if merge_index.shape[0] <= 1: - if det.shape[0] == 0: - try: - dets = np.row_stack((dets, det_accu)) - except: - dets = det_accu - continue - det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) - max_score = np.max(det_accu[:, 4]) - det_accu_sum = np.zeros((1, 5)) - det_accu_sum[:, 0:4] = np.sum( - det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) - det_accu_sum[:, 4] = max_score - try: - dets = np.row_stack((dets, det_accu_sum)) - except: - dets = det_accu_sum - dets = dets[0:750, :] - return dets - - -def crop(image, - pts, - shift=0, - scale=1.5, - rotate=0, - res_width=128, - res_height=128): - res = (res_width, res_height) - idx1 = 0 - idx2 = 1 - # angle - alpha = 0 - if pts[idx2, 0] != -1 and pts[idx2, 1] != -1 and pts[idx1, 0] != -1 and pts[ - idx1, 1] != -1: - alpha = math.atan2(pts[idx2, 1] - pts[idx1, 1], - pts[idx2, 0] - pts[idx1, 0]) * 180 / math.pi - pts[pts == -1] = np.inf - coord_min = np.min(pts, 0) - pts[pts == np.inf] = -1 - coord_max = np.max(pts, 0) - # coordinates of center point - c = np.array([ - coord_max[0] - (coord_max[0] - coord_min[0]) / 2, - coord_max[1] - (coord_max[1] - coord_min[1]) / 2 - ]) # center - max_wh = max((coord_max[0] - coord_min[0]) / 2, - (coord_max[1] - coord_min[1]) / 2) - # Shift the center point, rot add eyes angle - c = c + shift * max_wh - rotate = rotate + alpha - M = cv2.getRotationMatrix2D((c[0], c[1]), rotate, - res[0] / (2 * max_wh * scale)) - M[0, 2] = M[0, 2] - (c[0] - res[0] / 2.0) - M[1, 2] = M[1, 2] - (c[1] - res[0] / 2.0) - image_out = cv2.warpAffine(image, M, res) - return image_out, M - - -def color_normalize(image, mean, std=None): - if image.shape[-1] == 1: - image = np.repeat(image, axis=2) - h, w, c = image.shape - image = np.transpose(image, (2, 0, 1)) - image = np.subtract(image.reshape(c, -1), mean[:, np.newaxis]).reshape( - -1, h, w) - image = np.transpose(image, (1, 2, 0)) - return image - - -def process_image(org_im, face): - pts = np.array([ - face['left'], face['top'], face['right'], face['top'], face['left'], - face['bottom'], face['right'], face['bottom'] - ]).reshape(4, 2).astype(np.float32) - image_in, M = crop(org_im, pts) - image_in = image_in / 256.0 - image_in = color_normalize(image_in, mean=np.array([0.5, 0.5, 0.5])) - image_in = image_in.astype(np.float32).transpose([2, 0, 1]).reshape( - -1, 3, 128, 128) - return image_in - - -def reader(face_detector, shrink, confs_threshold, images, paths, use_gpu, - use_multi_scale): - """ - Preprocess to yield image. - - Args: - face_detector (class): class to detect faces. - shrink (float): parameter to control the resize scale in face_detector. - confs_threshold (float): confidence threshold of face_detector. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): paths to images. - use_gpu (bool): whether to use gpu in face_detector. - use_multi_scale (bool): whether to enable multi-scale face detection. - Yield: - element (collections.OrderedDict): info of original image, preprocessed image, contains 3 keys: - org_im (numpy.ndarray) : original image. - org_im_path (str): path to original image. - preprocessed (list[OrderedDict]):each element contains 2 keys: - face (dict): face detected in the original image. - image (numpy.ndarray): data to be fed into neural network. - """ - component = list() - if paths is not None: - assert type(paths) is list, "paths should be a list." - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['org_im'] = im - each['org_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - component.append(each) - - for element in component: - if use_multi_scale: - scale_res = list() - detect_faces = list() - for scale in multi_scales: - _detect_res = face_detector.face_detection( - images=[element['org_im']], - use_gpu=use_gpu, - visualization=False, - shrink=scale, - confs_threshold=confs_threshold) - - _s = list() - for _face in _detect_res[0]['data']: - _face_list = [ - _face['left'], _face['top'], _face['right'], - _face['bottom'], _face['confidence'] - ] - _s.append(_face_list) - - if _s: - scale_res.append(np.array(_s)) - - scale_res = np.row_stack(scale_res) - scale_res = bbox_vote(scale_res) - keep_index = np.where(scale_res[:, 4] >= confs_threshold)[0] - scale_res = scale_res[keep_index, :] - for data in scale_res: - face = { - 'left': data[0], - 'top': data[1], - 'right': data[2], - 'bottom': data[3], - 'confidence': data[4] - } - detect_faces.append(face) - else: - _detect_res = face_detector.face_detection( - images=[element['org_im']], - use_gpu=use_gpu, - visualization=False, - shrink=shrink, - confs_threshold=confs_threshold) - detect_faces = _detect_res[0]['data'] - - element['preprocessed'] = list() - for face in detect_faces: - handled = OrderedDict() - handled['face'] = face - handled['image'] = process_image(element['org_im'], face) - element['preprocessed'].append(handled) - - yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py b/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py deleted file mode 100644 index e98c9944ff76c8baa97988580fd994c5431048d7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py +++ /dev/null @@ -1,299 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from pyramidbox_lite_mobile_mask.data_feed import reader -from pyramidbox_lite_mobile_mask.processor import postprocess, base64_to_cv2 - - -@moduleinfo( - name="pyramidbox_lite_mobile_mask", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary= - "Pyramidbox-Lite-Mobile-Mask is a high-performance face detection model used to detect whether people wear masks.", - version="1.3.0") -class PyramidBoxLiteMobileMask(hub.Module): - def _initialize(self, face_detector_module=None): - """ - Args: - face_detector_module (class): module to detect face. - """ - self.default_pretrained_model_path = os.path.join( - self.directory, "pyramidbox_lite_mobile_mask_model") - if face_detector_module is None: - self.face_detector = hub.Module(name='pyramidbox_lite_mobile') - else: - self.face_detector = face_detector_module - self._set_config() - self.processor = self - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def set_face_detector_module(self, face_detector_module): - """ - Set face detector. - Args: - face_detector_module (class): module to detect face. - """ - self.face_detector = face_detector_module - - def get_face_detector_module(self): - return self.face_detector - - def face_detection(self, - images=None, - paths=None, - data=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='detection_result', - use_multi_scale=False, - shrink=0.5, - confs_threshold=0.6): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size of image tensor to be fed into the later classification network. - use_gpu (bool): Whether to use gpu. - visualization (bool): Whether to save image or not. - output_dir (str): The path to store output images. - use_multi_scale (bool): whether to enable multi-scale face detection. Enabling multi-scale face detection - can increase the accuracy to detect faces, however, - it reduce the prediction speed for the increase model calculation. - shrink (float): parameter to control the resize scale in preprocess. - confs_threshold (float): confidence threshold. - - Returns: - res (list[dict]): The result of face detection and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data: - if 'image' in data: - if paths is None: - paths = list() - paths += data['image'] - elif 'data' in data: - if images is None: - images = list() - images += data['data'] - - # get all data - all_element = list() - for yield_data in reader(self.face_detector, shrink, confs_threshold, - images, paths, use_gpu, use_multi_scale): - all_element.append(yield_data) - - image_list = list() - element_image_num = list() - for i in range(len(all_element)): - element_image = [ - handled['image'] for handled in all_element[i]['preprocessed'] - ] - element_image_num.append(len(element_image)) - image_list.extend(element_image) - - total_num = len(image_list) - loop_num = int(np.ceil(total_num / batch_size)) - - predict_out = np.zeros((1, 2)) - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for element_id in range(batch_size): - try: - batch_data.append(image_list[handle_id + element_id]) - except: - pass - - image_arr = np.squeeze(np.array(batch_data), axis=1) - image_tensor = PaddleTensor(image_arr.copy()) - data_out = self.gpu_predictor.run([ - image_tensor - ]) if use_gpu else self.cpu_predictor.run([image_tensor]) - # len(data_out) == 1 - # data_out[0].as_ndarray().shape == (-1, 2) - data_out = data_out[0].as_ndarray() - predict_out = np.concatenate((predict_out, data_out)) - - predict_out = predict_out[1:] - # postprocess one by one - res = list() - for i in range(len(all_element)): - detect_faces_list = [ - handled['face'] for handled in all_element[i]['preprocessed'] - ] - interval_left = sum(element_image_num[0:i]) - interval_right = interval_left + element_image_num[i] - out = postprocess( - confidence_out=predict_out[interval_left:interval_right], - org_im=all_element[i]['org_im'], - org_im_path=all_element[i]['org_im_path'], - detected_faces=detect_faces_list, - output_dir=output_dir, - visualization=visualization) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - classifier_dir = os.path.join(dirname, 'mask_detector') - detector_dir = os.path.join(dirname, 'pyramidbox_lite') - self._save_classifier_model(classifier_dir, model_filename, - params_filename, combined) - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.face_detector.save_inference_model(dirname, model_filename, - params_filename, combined) - - def _save_classifier_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--shrink', - type=ast.literal_eval, - default=0.5, - help= - "resize the image to `shrink * original_shape` before feeding into network." - ) - self.arg_input_group.add_argument( - '--confs_threshold', - type=ast.literal_eval, - default=0.6, - help="confidence threshold.") diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py b/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py deleted file mode 100644 index 61c7be6addd3c1791d1c8e46d0bcd58dcf93e8c9..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py +++ /dev/null @@ -1,149 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from collections import OrderedDict - -import base64 -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'postprocess'] - -label_list = ['NO MASK', 'MASK'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of original image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - # extension - img = Image.fromarray(org_im[:, :, ::-1]) - if img.mode == 'RGBA': - ext = '.png' - elif img.mode == 'RGB': - ext = '.jpg' - elif img.mode == 'L': # black and white - ext = '.jpg' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path - - -def draw_bounding_box_on_image(save_im_path, output_data): - image = Image.open(save_im_path) - draw = ImageDraw.Draw(image) - for bbox in output_data: - # draw bouding box - if bbox['label'] == "MASK": - draw.line([(bbox['left'], bbox['top']), - (bbox['left'], bbox['bottom']), - (bbox['right'], bbox['bottom']), - (bbox['right'], bbox['top']), - (bbox['left'], bbox['top'])], - width=2, - fill='green') - else: - draw.line([(bbox['left'], bbox['top']), - (bbox['left'], bbox['bottom']), - (bbox['right'], bbox['bottom']), - (bbox['right'], bbox['top']), - (bbox['left'], bbox['top'])], - width=2, - fill='red') - # draw label - text = bbox['label'] + ": %.2f%%" % (100 * bbox['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - if image.mode == 'RGB' or image.mode == 'RGBA': - box_fill = (255, 255, 255) - text_fill = (0, 0, 0) - else: - box_fill = (255) - text_fill = (0) - - draw.rectangle( - xy=(bbox['left'], bbox['top'] - (textsize_height + 5), - bbox['left'] + textsize_width + 10, bbox['top'] - 3), - fill=box_fill) - draw.text( - xy=(bbox['left'], bbox['top'] - 15), text=text, fill=text_fill) - image.save(save_im_path) - - -def postprocess(confidence_out, org_im, org_im_path, detected_faces, output_dir, - visualization): - """ - Postprocess output of network. one element at a time. - - Args: - confidence_out (numpy.ndarray): confidences of each label. - org_im (numpy.ndarray): original image. - org_im_path (list): path of original image. - detected_faces (list): faces detected in a picture. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - - Returns: - output (dict): keys are 'data' and 'path', the correspoding values are: - data (list[dict]): 6 keys, where - 'label' is `MASK` or `NO MASK`, - 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, - 'confidence' is the confidence of mask detection. - path (str): The path of original image. - """ - output = dict() - output['data'] = list() - output['path'] = org_im_path - - for index, face in enumerate(detected_faces): - label_idx = np.argmax(confidence_out[index]) - label_confidence = confidence_out[index][label_idx] - bbox = dict() - bbox['label'] = label_list[label_idx] - bbox['confidence'] = label_confidence - bbox['top'] = detected_faces[index]['top'] - bbox['bottom'] = detected_faces[index]['bottom'] - bbox['left'] = detected_faces[index]['left'] - bbox['right'] = detected_faces[index]['right'] - output['data'].append(bbox) - - if visualization: - check_dir(output_dir) - save_im_path = get_save_image_name(org_im, org_im_path, output_dir) - cv2.imwrite(save_im_path, org_im) - draw_bounding_box_on_image(save_im_path, output['data']) - - return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server/data_feed.py b/hub_module/modules/image/face_detection/pyramidbox_lite_server/data_feed.py deleted file mode 100644 index 92ad422074b8bbd24e09d86cfaebc10159b2bc75..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_server/data_feed.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def preprocess(org_im, shrink): - image = org_im.copy() - image_height, image_width, image_channel = image.shape - if shrink != 1: - image_height, image_width = int(image_height * shrink), int( - image_width * shrink) - image = cv2.resize(image, (image_width, image_height), - cv2.INTER_NEAREST) - # HWC to CHW - if len(image.shape) == 3: - image = np.swapaxes(image, 1, 2) - image = np.swapaxes(image, 1, 0) - # mean, std - mean = [104., 117., 123.] - scale = 0.007843 - image = image.astype('float32') - image -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') - image = image * scale - return image, image_height, image_width - - -def reader(images, paths, shrink): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): paths to images. - shrink (float): parameter to control the resize scale in preprocess. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths is not None: - assert type(paths) is list, "paths should be a list." - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['org_im'] = im - each['org_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - component.append(each) - - for element in component: - element['image'], element['image_height'], element[ - 'image_width'] = preprocess(element['org_im'], shrink) - yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server/module.py b/hub_module/modules/image/face_detection/pyramidbox_lite_server/module.py deleted file mode 100644 index 5e7be439d8c6a22a43fa020982e7bc1709639499..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_server/module.py +++ /dev/null @@ -1,217 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from pyramidbox_lite_server.data_feed import reader -from pyramidbox_lite_server.processor import postprocess, base64_to_cv2 - - -@moduleinfo( - name="pyramidbox_lite_server", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary="PyramidBox-Lite-Server is a high-performance face detection model.", - version="1.2.0") -class PyramidBoxLiteServer(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "pyramidbox_lite_server_face_detection") - self._set_config() - self.processor = self - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def face_detection(self, - images=None, - paths=None, - data=None, - use_gpu=False, - output_dir='detection_result', - visualization=False, - shrink=0.5, - confs_threshold=0.6): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): The paths of images. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - shrink (float): parameter to control the resize scale in preprocess. - confs_threshold (float): confidence threshold. - - Returns: - res (list[dict]): The result of face detection and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data: - if 'image' in data: - if paths is None: - paths = list() - paths += data['image'] - elif 'data' in data: - if images is None: - images = list() - images += data['data'] - - res = list() - # process one by one - for element in reader(images, paths, shrink): - image = np.expand_dims(element['image'], axis=0).astype('float32') - image_tensor = PaddleTensor(image.copy()) - data_out = self.gpu_predictor.run([ - image_tensor - ]) if use_gpu else self.cpu_predictor.run([image_tensor]) - out = postprocess( - data_out=data_out[0].as_ndarray(), - org_im=element['org_im'], - org_im_path=element['org_im_path'], - image_width=element['image_width'], - image_height=element['image_height'], - output_dir=output_dir, - visualization=visualization, - shrink=shrink, - confs_threshold=confs_threshold) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--shrink', - type=ast.literal_eval, - default=0.5, - help= - "resize the image to shrink * original_shape before feeding into network." - ) - self.arg_input_group.add_argument( - '--confs_threshold', - type=ast.literal_eval, - default=0.6, - help="confidence threshold.") diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server/processor.py b/hub_module/modules/image/face_detection/pyramidbox_lite_server/processor.py deleted file mode 100644 index 25f8fa3d9dda6078cc65dd4017f4e5df7b96eaa6..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_server/processor.py +++ /dev/null @@ -1,119 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from collections import OrderedDict - -import base64 -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['base64_to_cv2', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of original image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - # extension - img = Image.fromarray(org_im[:, :, ::-1]) - if img.mode == 'RGBA': - ext = '.png' - elif img.mode == 'RGB': - ext = '.jpg' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path - - -def clip_bbox(bbox, img_height, img_width): - bbox['left'] = int(max(min(bbox['left'], img_width), 0.)) - bbox['top'] = int(max(min(bbox['top'], img_height), 0.)) - bbox['right'] = int(max(min(bbox['right'], img_width), 0.)) - bbox['bottom'] = int(max(min(bbox['bottom'], img_height), 0.)) - return bbox - - -def postprocess(data_out, org_im, org_im_path, image_height, image_width, - output_dir, visualization, shrink, confs_threshold): - """ - Postprocess output of network. one image at a time. - - Args: - data_out (numpy.ndarray): output of network. - org_im (numpy.ndarray): original image. - org_im_path (list): path of riginal image. - image_height (int): height of preprocessed image. - image_width (int): width of preprocessed image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - shrink (float): parameter to control the resize scale in preprocess. - confs_threshold (float): confidence threshold. - - Returns: - output (dict): keys are 'data' and 'path', the correspoding values are: - data (list[dict]): 5 keys, where - 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, - 'confidence' is the confidence this bbox. - path (str): The path of original image. - """ - output = dict() - output['data'] = list() - output['path'] = org_im_path - - for each_data in data_out: - # each_data is a list: [label, confidence, left, top, right, bottom] - if each_data[1] > confs_threshold: - dt_bbox = dict() - dt_bbox['confidence'] = float(each_data[1]) - dt_bbox['left'] = image_width * each_data[2] / shrink - dt_bbox['top'] = image_height * each_data[3] / shrink - dt_bbox['right'] = image_width * each_data[4] / shrink - dt_bbox['bottom'] = image_height * each_data[5] / shrink - dt_bbox = clip_bbox(dt_bbox, org_im.shape[0], org_im.shape[1]) - output['data'].append(dt_bbox) - - if visualization: - check_dir(output_dir) - save_im_path = get_save_image_name(org_im, org_im_path, output_dir) - im_out = org_im.copy() - if len(output['data']) > 0: - for bbox in output['data']: - cv2.rectangle(im_out, (bbox['left'], bbox['top']), - (bbox['right'], bbox['bottom']), (255, 255, 0), 2) - cv2.imwrite(save_im_path, im_out) - - return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py b/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py deleted file mode 100644 index cce81017a4572eef73a305699c6cda78aea5f33d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py +++ /dev/null @@ -1,212 +0,0 @@ -# coding=utf-8 -import os -import math -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['reader'] - -multi_scales = [0.3, 0.6, 0.9] - - -def bbox_vote(det): - order = det[:, 4].ravel().argsort()[::-1] - det = det[order, :] - if det.shape[0] == 0: - dets = np.array([[10, 10, 20, 20, 0.002]]) - det = np.empty(shape=[0, 5]) - while det.shape[0] > 0: - # IOU - area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) - xx1 = np.maximum(det[0, 0], det[:, 0]) - yy1 = np.maximum(det[0, 1], det[:, 1]) - xx2 = np.minimum(det[0, 2], det[:, 2]) - yy2 = np.minimum(det[0, 3], det[:, 3]) - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - o = inter / (area[0] + area[:] - inter) - # nms - merge_index = np.where(o >= 0.3)[0] - det_accu = det[merge_index, :] - det = np.delete(det, merge_index, 0) - if merge_index.shape[0] <= 1: - if det.shape[0] == 0: - try: - dets = np.row_stack((dets, det_accu)) - except: - dets = det_accu - continue - det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) - max_score = np.max(det_accu[:, 4]) - det_accu_sum = np.zeros((1, 5)) - det_accu_sum[:, 0:4] = np.sum( - det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) - det_accu_sum[:, 4] = max_score - try: - dets = np.row_stack((dets, det_accu_sum)) - except: - dets = det_accu_sum - dets = dets[0:750, :] - return dets - - -def crop(image, - pts, - shift=0, - scale=1.5, - rotate=0, - res_width=128, - res_height=128): - res = (res_width, res_height) - idx1 = 0 - idx2 = 1 - # angle - alpha = 0 - if pts[idx2, 0] != -1 and pts[idx2, 1] != -1 and pts[idx1, 0] != -1 and pts[ - idx1, 1] != -1: - alpha = math.atan2(pts[idx2, 1] - pts[idx1, 1], - pts[idx2, 0] - pts[idx1, 0]) * 180 / math.pi - pts[pts == -1] = np.inf - coord_min = np.min(pts, 0) - pts[pts == np.inf] = -1 - coord_max = np.max(pts, 0) - # coordinates of center point - c = np.array([ - coord_max[0] - (coord_max[0] - coord_min[0]) / 2, - coord_max[1] - (coord_max[1] - coord_min[1]) / 2 - ]) # center - max_wh = max((coord_max[0] - coord_min[0]) / 2, - (coord_max[1] - coord_min[1]) / 2) - # Shift the center point, rot add eyes angle - c = c + shift * max_wh - rotate = rotate + alpha - M = cv2.getRotationMatrix2D((c[0], c[1]), rotate, - res[0] / (2 * max_wh * scale)) - M[0, 2] = M[0, 2] - (c[0] - res[0] / 2.0) - M[1, 2] = M[1, 2] - (c[1] - res[0] / 2.0) - image_out = cv2.warpAffine(image, M, res) - return image_out, M - - -def color_normalize(image, mean, std=None): - if image.shape[-1] == 1: - image = np.repeat(image, axis=2) - h, w, c = image.shape - image = np.transpose(image, (2, 0, 1)) - image = np.subtract(image.reshape(c, -1), mean[:, np.newaxis]).reshape( - -1, h, w) - image = np.transpose(image, (1, 2, 0)) - return image - - -def process_image(org_im, face): - pts = np.array([ - face['left'], face['top'], face['right'], face['top'], face['left'], - face['bottom'], face['right'], face['bottom'] - ]).reshape(4, 2).astype(np.float32) - image_in, M = crop(org_im, pts) - image_in = image_in / 256.0 - image_in = color_normalize(image_in, mean=np.array([0.5, 0.5, 0.5])) - image_in = image_in.astype(np.float32).transpose([2, 0, 1]).reshape( - -1, 3, 128, 128) - return image_in - - -def reader(face_detector, shrink, confs_threshold, images, paths, use_gpu, - use_multi_scale): - """ - Preprocess to yield image. - - Args: - face_detector (class): class to detect faces. - shrink (float): parameter to control the resize scale in face_detector. - confs_threshold (float): confidence threshold of face_detector. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): paths to images. - use_gpu (bool): whether to use gpu in face_detector. - use_multi_scale (bool): whether to enable multi-scale face detection. - Yield: - element (collections.OrderedDict): info of original image, preprocessed image, contains 3 keys: - org_im (numpy.ndarray) : original image. - org_im_path (str): path to original image. - preprocessed (list[OrderedDict]):each element contains 2 keys: - face (dict): face detected in the original image. - image (numpy.ndarray): data to be fed into neural network. - """ - component = list() - if paths is not None: - assert type(paths) is list, "paths should be a list." - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['org_im'] = im - each['org_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - component.append(each) - - for element in component: - if use_multi_scale: - scale_res = list() - detect_faces = list() - for scale in multi_scales: - _detect_res = face_detector.face_detection( - images=[element['org_im']], - use_gpu=use_gpu, - visualization=False, - shrink=scale, - confs_threshold=confs_threshold) - - _s = list() - for _face in _detect_res[0]['data']: - _face_list = [ - _face['left'], _face['top'], _face['right'], - _face['bottom'], _face['confidence'] - ] - _s.append(_face_list) - - if _s: - scale_res.append(np.array(_s)) - - scale_res = np.row_stack(scale_res) - scale_res = bbox_vote(scale_res) - keep_index = np.where(scale_res[:, 4] >= confs_threshold)[0] - scale_res = scale_res[keep_index, :] - for data in scale_res: - face = { - 'left': data[0], - 'top': data[1], - 'right': data[2], - 'bottom': data[3], - 'confidence': data[4] - } - detect_faces.append(face) - else: - _detect_res = face_detector.face_detection( - images=[element['org_im']], - use_gpu=use_gpu, - visualization=False, - shrink=shrink, - confs_threshold=confs_threshold) - detect_faces = _detect_res[0]['data'] - - element['preprocessed'] = list() - for face in detect_faces: - handled = OrderedDict() - handled['face'] = face - handled['image'] = process_image(element['org_im'], face) - element['preprocessed'].append(handled) - - yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/module.py b/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/module.py deleted file mode 100644 index 06cc0f3cee0101b6806f6dfa4545cbe3a8babddc..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/module.py +++ /dev/null @@ -1,298 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from pyramidbox_lite_server_mask.data_feed import reader -from pyramidbox_lite_server_mask.processor import postprocess, base64_to_cv2 - - -@moduleinfo( - name="pyramidbox_lite_server_mask", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary= - "PyramidBox-Lite-Server-Mask is a high-performance face detection model used to detect whether people wear masks.", - version="1.3.0") -class PyramidBoxLiteServerMask(hub.Module): - def _initialize(self, face_detector_module=None): - """ - Args: - face_detector_module (class): module to detect face. - """ - self.default_pretrained_model_path = os.path.join( - self.directory, "pyramidbox_lite_server_mask_model") - if face_detector_module is None: - self.face_detector = hub.Module(name='pyramidbox_lite_server') - else: - self.face_detector = face_detector_module - self._set_config() - self.processor = self - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def set_face_detector_module(self, face_detector_module): - """ - Set face detector. - Args: - face_detector_module (class): module to detect faces. - """ - self.face_detector = face_detector_module - - def get_face_detector_module(self): - return self.face_detector - - def face_detection(self, - images=None, - paths=None, - data=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='detection_result', - use_multi_scale=False, - shrink=0.5, - confs_threshold=0.6): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space must be BGR. - paths (list[str]): The paths of images. - use_gpu (bool): Whether to use gpu. - visualization (bool): Whether to save image or not. - output_dir (str): The path to store output images. - use_multi_scale (bool): whether to enable multi-scale face detection. Enabling multi-scale face detection - can increase the accuracy to detect faces, however, - it reduce the prediction speed for the increase model calculation. - shrink (float): parameter to control the resize scale in preprocess. - confs_threshold (float): confidence threshold. - - Returns: - res (list[dict]): The result of face detection and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data: - if 'image' in data: - if paths is None: - paths = list() - paths += data['image'] - elif 'data' in data: - if images is None: - images = list() - images += data['data'] - - # get all data - all_element = list() - for yield_data in reader(self.face_detector, shrink, confs_threshold, - images, paths, use_gpu, use_multi_scale): - all_element.append(yield_data) - - image_list = list() - element_image_num = list() - for i in range(len(all_element)): - element_image = [ - handled['image'] for handled in all_element[i]['preprocessed'] - ] - element_image_num.append(len(element_image)) - image_list.extend(element_image) - - total_num = len(image_list) - loop_num = int(np.ceil(total_num / batch_size)) - - predict_out = np.zeros((1, 2)) - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for element_id in range(batch_size): - try: - batch_data.append(image_list[handle_id + element_id]) - except: - pass - - image_arr = np.squeeze(np.array(batch_data), axis=1) - image_tensor = PaddleTensor(image_arr.copy()) - data_out = self.gpu_predictor.run([ - image_tensor - ]) if use_gpu else self.cpu_predictor.run([image_tensor]) - # len(data_out) == 1 - # data_out[0].as_ndarray().shape == (-1, 2) - data_out = data_out[0].as_ndarray() - predict_out = np.concatenate((predict_out, data_out)) - - predict_out = predict_out[1:] - # postprocess one by one - res = list() - for i in range(len(all_element)): - detect_faces_list = [ - handled['face'] for handled in all_element[i]['preprocessed'] - ] - interval_left = sum(element_image_num[0:i]) - interval_right = interval_left + element_image_num[i] - out = postprocess( - confidence_out=predict_out[interval_left:interval_right], - org_im=all_element[i]['org_im'], - org_im_path=all_element[i]['org_im_path'], - detected_faces=detect_faces_list, - output_dir=output_dir, - visualization=visualization) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - classifier_dir = os.path.join(dirname, 'mask_detector') - detector_dir = os.path.join(dirname, 'pyramidbox_lite') - self._save_classifier_model(classifier_dir, model_filename, - params_filename, combined) - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.face_detector.save_inference_model(dirname, model_filename, - params_filename, combined) - - def _save_classifier_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--shrink', - type=ast.literal_eval, - default=0.5, - help= - "resize the image to `shrink * original_shape` before feeding into network." - ) - self.arg_input_group.add_argument( - '--confs_threshold', - type=ast.literal_eval, - default=0.6, - help="confidence threshold.") diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py b/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py deleted file mode 100644 index 61c7be6addd3c1791d1c8e46d0bcd58dcf93e8c9..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py +++ /dev/null @@ -1,149 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from collections import OrderedDict - -import base64 -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'postprocess'] - -label_list = ['NO MASK', 'MASK'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of original image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - # extension - img = Image.fromarray(org_im[:, :, ::-1]) - if img.mode == 'RGBA': - ext = '.png' - elif img.mode == 'RGB': - ext = '.jpg' - elif img.mode == 'L': # black and white - ext = '.jpg' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path - - -def draw_bounding_box_on_image(save_im_path, output_data): - image = Image.open(save_im_path) - draw = ImageDraw.Draw(image) - for bbox in output_data: - # draw bouding box - if bbox['label'] == "MASK": - draw.line([(bbox['left'], bbox['top']), - (bbox['left'], bbox['bottom']), - (bbox['right'], bbox['bottom']), - (bbox['right'], bbox['top']), - (bbox['left'], bbox['top'])], - width=2, - fill='green') - else: - draw.line([(bbox['left'], bbox['top']), - (bbox['left'], bbox['bottom']), - (bbox['right'], bbox['bottom']), - (bbox['right'], bbox['top']), - (bbox['left'], bbox['top'])], - width=2, - fill='red') - # draw label - text = bbox['label'] + ": %.2f%%" % (100 * bbox['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - if image.mode == 'RGB' or image.mode == 'RGBA': - box_fill = (255, 255, 255) - text_fill = (0, 0, 0) - else: - box_fill = (255) - text_fill = (0) - - draw.rectangle( - xy=(bbox['left'], bbox['top'] - (textsize_height + 5), - bbox['left'] + textsize_width + 10, bbox['top'] - 3), - fill=box_fill) - draw.text( - xy=(bbox['left'], bbox['top'] - 15), text=text, fill=text_fill) - image.save(save_im_path) - - -def postprocess(confidence_out, org_im, org_im_path, detected_faces, output_dir, - visualization): - """ - Postprocess output of network. one element at a time. - - Args: - confidence_out (numpy.ndarray): confidences of each label. - org_im (numpy.ndarray): original image. - org_im_path (list): path of original image. - detected_faces (list): faces detected in a picture. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - - Returns: - output (dict): keys are 'data' and 'path', the correspoding values are: - data (list[dict]): 6 keys, where - 'label' is `MASK` or `NO MASK`, - 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, - 'confidence' is the confidence of mask detection. - path (str): The path of original image. - """ - output = dict() - output['data'] = list() - output['path'] = org_im_path - - for index, face in enumerate(detected_faces): - label_idx = np.argmax(confidence_out[index]) - label_confidence = confidence_out[index][label_idx] - bbox = dict() - bbox['label'] = label_list[label_idx] - bbox['confidence'] = label_confidence - bbox['top'] = detected_faces[index]['top'] - bbox['bottom'] = detected_faces[index]['bottom'] - bbox['left'] = detected_faces[index]['left'] - bbox['right'] = detected_faces[index]['right'] - output['data'].append(bbox) - - if visualization: - check_dir(output_dir) - save_im_path = get_save_image_name(org_im, org_im_path, output_dir) - cv2.imwrite(save_im_path, org_im) - draw_bounding_box_on_image(save_im_path, output['data']) - - return output diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/data_feed.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/data_feed.py deleted file mode 100644 index 0899f7e8421cbfd8511a1f1c9192633e88c02038..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/data_feed.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding=utf-8 -import os -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def preprocess(orig_image): - image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) - image = cv2.resize(image, (320, 240)) - image_mean = np.array([127, 127, 127]) - image = (image - image_mean) / 128.0 - image = np.transpose(image, [2, 0, 1]) - return image - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['orig_im'] = im - each['orig_im_shape'] = im.shape # height, width, channel - each['orig_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['orig_im'] = im - each['orig_im_path'] = None - each['orig_im_shape'] = im.shape # height, width, channel - component.append(each) - - for element in component: - element['image'] = preprocess(element['orig_im']) - yield element diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py deleted file mode 100644 index 82931fec54aafd13b8af9c0f2d9a3cec3ebad790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py +++ /dev/null @@ -1,1212 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def face_detector_320(): - _319 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _322 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _323 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _333 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _336 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _337 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _365 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _368 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _369 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _379 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _382 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _383 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _405 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _408 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _409 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _419 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _422 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _423 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _437 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _440 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _441 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _449 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _452 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _453 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _463 = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=0.10000000149011612) - _465 = fluid.layers.create_parameter( - dtype='float32', - shape=[1, 4420, 2], - name='_465', - attr='_465', - default_initializer=Constant(0.0)) - _467 = fluid.layers.create_parameter( - dtype='float32', - shape=[1, 4420, 2], - name='_467', - attr='_467', - default_initializer=Constant(0.0)) - _470 = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=0.20000000298023224) - _473 = fluid.layers.create_parameter( - dtype='float32', - shape=[1, 4420, 2], - name='_473', - attr='_473', - default_initializer=Constant(0.0)) - _478 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _483 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _input = fluid.layers.data( - dtype='float32', - shape=[1, 3, 240, 320], - name='_input', - append_batch_size=False) - _325 = fluid.layers.assign(_322) - _326 = fluid.layers.assign(_323) - _339 = fluid.layers.assign(_336) - _340 = fluid.layers.assign(_337) - _371 = fluid.layers.assign(_368) - _372 = fluid.layers.assign(_369) - _385 = fluid.layers.assign(_382) - _386 = fluid.layers.assign(_383) - _411 = fluid.layers.assign(_408) - _412 = fluid.layers.assign(_409) - _425 = fluid.layers.assign(_422) - _426 = fluid.layers.assign(_423) - _443 = fluid.layers.assign(_440) - _444 = fluid.layers.assign(_441) - _455 = fluid.layers.assign(_452) - _456 = fluid.layers.assign(_453) - _245 = fluid.layers.conv2d( - _input, - num_filters=16, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_0_0_weight', - name='_245', - bias_attr=False) - _246 = fluid.layers.batch_norm( - _245, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_0_1_weight', - bias_attr='_base_net_0_1_bias', - moving_mean_name='_base_net_0_1_running_mean', - moving_variance_name='_base_net_0_1_running_var', - use_global_stats=False, - name='_246') - _247 = fluid.layers.relu(_246, name='_247') - _248 = fluid.layers.conv2d( - _247, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=16, - param_attr='_base_net_1_0_weight', - name='_248', - bias_attr=False) - _249 = fluid.layers.batch_norm( - _248, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_1_weight', - bias_attr='_base_net_1_1_bias', - moving_mean_name='_base_net_1_1_running_mean', - moving_variance_name='_base_net_1_1_running_var', - use_global_stats=False, - name='_249') - _250 = fluid.layers.relu(_249, name='_250') - _251 = fluid.layers.conv2d( - _250, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_1_3_weight', - name='_251', - bias_attr=False) - _252 = fluid.layers.batch_norm( - _251, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_4_weight', - bias_attr='_base_net_1_4_bias', - moving_mean_name='_base_net_1_4_running_mean', - moving_variance_name='_base_net_1_4_running_var', - use_global_stats=False, - name='_252') - _253 = fluid.layers.relu(_252, name='_253') - _254 = fluid.layers.conv2d( - _253, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_2_0_weight', - name='_254', - bias_attr=False) - _255 = fluid.layers.batch_norm( - _254, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_1_weight', - bias_attr='_base_net_2_1_bias', - moving_mean_name='_base_net_2_1_running_mean', - moving_variance_name='_base_net_2_1_running_var', - use_global_stats=False, - name='_255') - _256 = fluid.layers.relu(_255, name='_256') - _257 = fluid.layers.conv2d( - _256, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_2_3_weight', - name='_257', - bias_attr=False) - _258 = fluid.layers.batch_norm( - _257, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_4_weight', - bias_attr='_base_net_2_4_bias', - moving_mean_name='_base_net_2_4_running_mean', - moving_variance_name='_base_net_2_4_running_var', - use_global_stats=False, - name='_258') - _259 = fluid.layers.relu(_258, name='_259') - _260 = fluid.layers.conv2d( - _259, - num_filters=32, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_3_0_weight', - name='_260', - bias_attr=False) - _261 = fluid.layers.batch_norm( - _260, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_1_weight', - bias_attr='_base_net_3_1_bias', - moving_mean_name='_base_net_3_1_running_mean', - moving_variance_name='_base_net_3_1_running_var', - use_global_stats=False, - name='_261') - _262 = fluid.layers.relu(_261, name='_262') - _263 = fluid.layers.conv2d( - _262, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_3_3_weight', - name='_263', - bias_attr=False) - _264 = fluid.layers.batch_norm( - _263, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_4_weight', - bias_attr='_base_net_3_4_bias', - moving_mean_name='_base_net_3_4_running_mean', - moving_variance_name='_base_net_3_4_running_var', - use_global_stats=False, - name='_264') - _265 = fluid.layers.relu(_264, name='_265') - _266 = fluid.layers.conv2d( - _265, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_4_0_weight', - name='_266', - bias_attr=False) - _267 = fluid.layers.batch_norm( - _266, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_1_weight', - bias_attr='_base_net_4_1_bias', - moving_mean_name='_base_net_4_1_running_mean', - moving_variance_name='_base_net_4_1_running_var', - use_global_stats=False, - name='_267') - _268 = fluid.layers.relu(_267, name='_268') - _269 = fluid.layers.conv2d( - _268, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_4_3_weight', - name='_269', - bias_attr=False) - _270 = fluid.layers.batch_norm( - _269, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_4_weight', - bias_attr='_base_net_4_4_bias', - moving_mean_name='_base_net_4_4_running_mean', - moving_variance_name='_base_net_4_4_running_var', - use_global_stats=False, - name='_270') - _271 = fluid.layers.relu(_270, name='_271') - _272 = fluid.layers.conv2d( - _271, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_5_0_weight', - name='_272', - bias_attr=False) - _273 = fluid.layers.batch_norm( - _272, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_1_weight', - bias_attr='_base_net_5_1_bias', - moving_mean_name='_base_net_5_1_running_mean', - moving_variance_name='_base_net_5_1_running_var', - use_global_stats=False, - name='_273') - _274 = fluid.layers.relu(_273, name='_274') - _275 = fluid.layers.conv2d( - _274, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_5_3_weight', - name='_275', - bias_attr=False) - _276 = fluid.layers.batch_norm( - _275, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_4_weight', - bias_attr='_base_net_5_4_bias', - moving_mean_name='_base_net_5_4_running_mean', - moving_variance_name='_base_net_5_4_running_var', - use_global_stats=False, - name='_276') - _277 = fluid.layers.relu(_276, name='_277') - _278 = fluid.layers.conv2d( - _277, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_6_0_weight', - name='_278', - bias_attr=False) - _279 = fluid.layers.batch_norm( - _278, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_1_weight', - bias_attr='_base_net_6_1_bias', - moving_mean_name='_base_net_6_1_running_mean', - moving_variance_name='_base_net_6_1_running_var', - use_global_stats=False, - name='_279') - _280 = fluid.layers.relu(_279, name='_280') - _281 = fluid.layers.conv2d( - _280, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_6_3_weight', - name='_281', - bias_attr=False) - _282 = fluid.layers.batch_norm( - _281, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_4_weight', - bias_attr='_base_net_6_4_bias', - moving_mean_name='_base_net_6_4_running_mean', - moving_variance_name='_base_net_6_4_running_var', - use_global_stats=False, - name='_282') - _283 = fluid.layers.relu(_282, name='_283') - _284 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_0_conv_weight', - name='_284', - bias_attr=False) - _291 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_0_conv_weight', - name='_291', - bias_attr=False) - _298 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_0_conv_weight', - name='_298', - bias_attr=False) - _311 = fluid.layers.conv2d( - _283, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_shortcut_conv_weight', - name='_311', - bias_attr=False) - _285 = fluid.layers.batch_norm( - _284, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_0_bn_weight', - bias_attr='_base_net_7_branch0_0_bn_bias', - moving_mean_name='_base_net_7_branch0_0_bn_running_mean', - moving_variance_name='_base_net_7_branch0_0_bn_running_var', - use_global_stats=False, - name='_285') - _292 = fluid.layers.batch_norm( - _291, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_0_bn_weight', - bias_attr='_base_net_7_branch1_0_bn_bias', - moving_mean_name='_base_net_7_branch1_0_bn_running_mean', - moving_variance_name='_base_net_7_branch1_0_bn_running_var', - use_global_stats=False, - name='_292') - _299 = fluid.layers.batch_norm( - _298, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_0_bn_weight', - bias_attr='_base_net_7_branch2_0_bn_bias', - moving_mean_name='_base_net_7_branch2_0_bn_running_mean', - moving_variance_name='_base_net_7_branch2_0_bn_running_var', - use_global_stats=False, - name='_299') - _312 = fluid.layers.batch_norm( - _311, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_shortcut_bn_weight', - bias_attr='_base_net_7_shortcut_bn_bias', - moving_mean_name='_base_net_7_shortcut_bn_running_mean', - moving_variance_name='_base_net_7_shortcut_bn_running_var', - use_global_stats=False, - name='_312') - _286 = fluid.layers.conv2d( - _285, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_1_conv_weight', - name='_286', - bias_attr=False) - _293 = fluid.layers.conv2d( - _292, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_1_conv_weight', - name='_293', - bias_attr=False) - _300 = fluid.layers.conv2d( - _299, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_1_conv_weight', - name='_300', - bias_attr=False) - _287 = fluid.layers.batch_norm( - _286, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_1_bn_weight', - bias_attr='_base_net_7_branch0_1_bn_bias', - moving_mean_name='_base_net_7_branch0_1_bn_running_mean', - moving_variance_name='_base_net_7_branch0_1_bn_running_var', - use_global_stats=False, - name='_287') - _294 = fluid.layers.batch_norm( - _293, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_1_bn_weight', - bias_attr='_base_net_7_branch1_1_bn_bias', - moving_mean_name='_base_net_7_branch1_1_bn_running_mean', - moving_variance_name='_base_net_7_branch1_1_bn_running_var', - use_global_stats=False, - name='_294') - _301 = fluid.layers.batch_norm( - _300, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_1_bn_weight', - bias_attr='_base_net_7_branch2_1_bn_bias', - moving_mean_name='_base_net_7_branch2_1_bn_running_mean', - moving_variance_name='_base_net_7_branch2_1_bn_running_var', - use_global_stats=False, - name='_301') - _288 = fluid.layers.relu(_287, name='_288') - _295 = fluid.layers.relu(_294, name='_295') - _302 = fluid.layers.relu(_301, name='_302') - _289 = fluid.layers.conv2d( - _288, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[2, 2], - dilation=[2, 2], - groups=1, - param_attr='_base_net_7_branch0_2_conv_weight', - name='_289', - bias_attr=False) - _296 = fluid.layers.conv2d( - _295, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[3, 3], - dilation=[3, 3], - groups=1, - param_attr='_base_net_7_branch1_2_conv_weight', - name='_296', - bias_attr=False) - _303 = fluid.layers.conv2d( - _302, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_2_conv_weight', - name='_303', - bias_attr=False) - _290 = fluid.layers.batch_norm( - _289, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_2_bn_weight', - bias_attr='_base_net_7_branch0_2_bn_bias', - moving_mean_name='_base_net_7_branch0_2_bn_running_mean', - moving_variance_name='_base_net_7_branch0_2_bn_running_var', - use_global_stats=False, - name='_290') - _297 = fluid.layers.batch_norm( - _296, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_2_bn_weight', - bias_attr='_base_net_7_branch1_2_bn_bias', - moving_mean_name='_base_net_7_branch1_2_bn_running_mean', - moving_variance_name='_base_net_7_branch1_2_bn_running_var', - use_global_stats=False, - name='_297') - _304 = fluid.layers.batch_norm( - _303, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_2_bn_weight', - bias_attr='_base_net_7_branch2_2_bn_bias', - moving_mean_name='_base_net_7_branch2_2_bn_running_mean', - moving_variance_name='_base_net_7_branch2_2_bn_running_var', - use_global_stats=False, - name='_304') - _305 = fluid.layers.relu(_304, name='_305') - _306 = fluid.layers.conv2d( - _305, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[5, 5], - dilation=[5, 5], - groups=1, - param_attr='_base_net_7_branch2_3_conv_weight', - name='_306', - bias_attr=False) - _307 = fluid.layers.batch_norm( - _306, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_3_bn_weight', - bias_attr='_base_net_7_branch2_3_bn_bias', - moving_mean_name='_base_net_7_branch2_3_bn_running_mean', - moving_variance_name='_base_net_7_branch2_3_bn_running_var', - use_global_stats=False, - name='_307') - _308 = fluid.layers.concat([_290, _297, _307], axis=1) - _309 = fluid.layers.conv2d( - _308, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_ConvLinear_conv_weight', - name='_309', - bias_attr=False) - _310 = fluid.layers.batch_norm( - _309, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_ConvLinear_bn_weight', - bias_attr='_base_net_7_ConvLinear_bn_bias', - moving_mean_name='_base_net_7_ConvLinear_bn_running_mean', - moving_variance_name='_base_net_7_ConvLinear_bn_running_var', - use_global_stats=False, - name='_310') - _313 = fluid.layers.elementwise_add(x=_310, y=_312, name='_313') - _314 = fluid.layers.relu(_313, name='_314') - _315 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_classification_headers_0_0_weight', - name='_315', - bias_attr='_classification_headers_0_0_bias') - _329 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_regression_headers_0_0_weight', - name='_329', - bias_attr='_regression_headers_0_0_bias') - _343 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_8_0_weight', - name='_343', - bias_attr=False) - _316 = fluid.layers.relu(_315, name='_316') - _330 = fluid.layers.relu(_329, name='_330') - _344 = fluid.layers.batch_norm( - _343, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_1_weight', - bias_attr='_base_net_8_1_bias', - moving_mean_name='_base_net_8_1_running_mean', - moving_variance_name='_base_net_8_1_running_var', - use_global_stats=False, - name='_344') - _317 = fluid.layers.conv2d( - _316, - num_filters=6, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_0_2_weight', - name='_317', - bias_attr='_classification_headers_0_2_bias') - _331 = fluid.layers.conv2d( - _330, - num_filters=12, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_0_2_weight', - name='_331', - bias_attr='_regression_headers_0_2_bias') - _345 = fluid.layers.relu(_344, name='_345') - _318 = fluid.layers.transpose(_317, perm=[0, 2, 3, 1], name='_318') - _332 = fluid.layers.transpose(_331, perm=[0, 2, 3, 1], name='_332') - _346 = fluid.layers.conv2d( - _345, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_8_3_weight', - name='_346', - bias_attr=False) - _320 = fluid.layers.shape(_318) - _334 = fluid.layers.shape(_332) - _347 = fluid.layers.batch_norm( - _346, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_4_weight', - bias_attr='_base_net_8_4_bias', - moving_mean_name='_base_net_8_4_running_mean', - moving_variance_name='_base_net_8_4_running_var', - use_global_stats=False, - name='_347') - _321 = fluid.layers.gather(input=_320, index=_319) - _335 = fluid.layers.gather(input=_334, index=_333) - _348 = fluid.layers.relu(_347, name='_348') - _324 = fluid.layers.assign(_321) - _338 = fluid.layers.assign(_335) - _349 = fluid.layers.conv2d( - _348, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_9_0_weight', - name='_349', - bias_attr=False) - _327 = fluid.layers.concat([_324, _325, _326], axis=0) - _341 = fluid.layers.concat([_338, _339, _340], axis=0) - _350 = fluid.layers.batch_norm( - _349, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_1_weight', - bias_attr='_base_net_9_1_bias', - moving_mean_name='_base_net_9_1_running_mean', - moving_variance_name='_base_net_9_1_running_var', - use_global_stats=False, - name='_350') - _327_cast = fluid.layers.cast(_327, dtype='int32') - _328 = fluid.layers.reshape( - _318, name='_328', actual_shape=_327_cast, shape=[1, -1, 2]) - _341_cast = fluid.layers.cast(_341, dtype='int32') - _342 = fluid.layers.reshape( - _332, name='_342', actual_shape=_341_cast, shape=[1, -1, 4]) - _351 = fluid.layers.relu(_350, name='_351') - _352 = fluid.layers.conv2d( - _351, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_9_3_weight', - name='_352', - bias_attr=False) - _353 = fluid.layers.batch_norm( - _352, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_4_weight', - bias_attr='_base_net_9_4_bias', - moving_mean_name='_base_net_9_4_running_mean', - moving_variance_name='_base_net_9_4_running_var', - use_global_stats=False, - name='_353') - _354 = fluid.layers.relu(_353, name='_354') - _355 = fluid.layers.conv2d( - _354, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_10_0_weight', - name='_355', - bias_attr=False) - _356 = fluid.layers.batch_norm( - _355, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_1_weight', - bias_attr='_base_net_10_1_bias', - moving_mean_name='_base_net_10_1_running_mean', - moving_variance_name='_base_net_10_1_running_var', - use_global_stats=False, - name='_356') - _357 = fluid.layers.relu(_356, name='_357') - _358 = fluid.layers.conv2d( - _357, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_10_3_weight', - name='_358', - bias_attr=False) - _359 = fluid.layers.batch_norm( - _358, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_4_weight', - bias_attr='_base_net_10_4_bias', - moving_mean_name='_base_net_10_4_running_mean', - moving_variance_name='_base_net_10_4_running_var', - use_global_stats=False, - name='_359') - _360 = fluid.layers.relu(_359, name='_360') - _361 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_classification_headers_1_0_weight', - name='_361', - bias_attr='_classification_headers_1_0_bias') - _375 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_regression_headers_1_0_weight', - name='_375', - bias_attr='_regression_headers_1_0_bias') - _389 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_11_0_weight', - name='_389', - bias_attr=False) - _362 = fluid.layers.relu(_361, name='_362') - _376 = fluid.layers.relu(_375, name='_376') - _390 = fluid.layers.batch_norm( - _389, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_1_weight', - bias_attr='_base_net_11_1_bias', - moving_mean_name='_base_net_11_1_running_mean', - moving_variance_name='_base_net_11_1_running_var', - use_global_stats=False, - name='_390') - _363 = fluid.layers.conv2d( - _362, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_1_2_weight', - name='_363', - bias_attr='_classification_headers_1_2_bias') - _377 = fluid.layers.conv2d( - _376, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_1_2_weight', - name='_377', - bias_attr='_regression_headers_1_2_bias') - _391 = fluid.layers.relu(_390, name='_391') - _364 = fluid.layers.transpose(_363, perm=[0, 2, 3, 1], name='_364') - _378 = fluid.layers.transpose(_377, perm=[0, 2, 3, 1], name='_378') - _392 = fluid.layers.conv2d( - _391, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_11_3_weight', - name='_392', - bias_attr=False) - _366 = fluid.layers.shape(_364) - _380 = fluid.layers.shape(_378) - _393 = fluid.layers.batch_norm( - _392, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_4_weight', - bias_attr='_base_net_11_4_bias', - moving_mean_name='_base_net_11_4_running_mean', - moving_variance_name='_base_net_11_4_running_var', - use_global_stats=False, - name='_393') - _367 = fluid.layers.gather(input=_366, index=_365) - _381 = fluid.layers.gather(input=_380, index=_379) - _394 = fluid.layers.relu(_393, name='_394') - _370 = fluid.layers.assign(_367) - _384 = fluid.layers.assign(_381) - _395 = fluid.layers.conv2d( - _394, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_base_net_12_0_weight', - name='_395', - bias_attr=False) - _373 = fluid.layers.concat([_370, _371, _372], axis=0) - _387 = fluid.layers.concat([_384, _385, _386], axis=0) - _396 = fluid.layers.batch_norm( - _395, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_1_weight', - bias_attr='_base_net_12_1_bias', - moving_mean_name='_base_net_12_1_running_mean', - moving_variance_name='_base_net_12_1_running_var', - use_global_stats=False, - name='_396') - _373_cast = fluid.layers.cast(_373, dtype='int32') - _374 = fluid.layers.reshape( - _364, name='_374', actual_shape=_373_cast, shape=[1, -1, 2]) - _387_cast = fluid.layers.cast(_387, dtype='int32') - _388 = fluid.layers.reshape( - _378, name='_388', actual_shape=_387_cast, shape=[1, -1, 4]) - _397 = fluid.layers.relu(_396, name='_397') - _398 = fluid.layers.conv2d( - _397, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_12_3_weight', - name='_398', - bias_attr=False) - _399 = fluid.layers.batch_norm( - _398, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_4_weight', - bias_attr='_base_net_12_4_bias', - moving_mean_name='_base_net_12_4_running_mean', - moving_variance_name='_base_net_12_4_running_var', - use_global_stats=False, - name='_399') - _400 = fluid.layers.relu(_399, name='_400') - _401 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_classification_headers_2_0_weight', - name='_401', - bias_attr='_classification_headers_2_0_bias') - _415 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_regression_headers_2_0_weight', - name='_415', - bias_attr='_regression_headers_2_0_bias') - _429 = fluid.layers.conv2d( - _400, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_0_weight', - name='_429', - bias_attr='_extras_0_0_bias') - _402 = fluid.layers.relu(_401, name='_402') - _416 = fluid.layers.relu(_415, name='_416') - _430 = fluid.layers.relu(_429, name='_430') - _403 = fluid.layers.conv2d( - _402, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_2_2_weight', - name='_403', - bias_attr='_classification_headers_2_2_bias') - _417 = fluid.layers.conv2d( - _416, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_2_2_weight', - name='_417', - bias_attr='_regression_headers_2_2_bias') - _431 = fluid.layers.conv2d( - _430, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_extras_0_2_0_weight', - name='_431', - bias_attr='_extras_0_2_0_bias') - _404 = fluid.layers.transpose(_403, perm=[0, 2, 3, 1], name='_404') - _418 = fluid.layers.transpose(_417, perm=[0, 2, 3, 1], name='_418') - _432 = fluid.layers.relu(_431, name='_432') - _406 = fluid.layers.shape(_404) - _420 = fluid.layers.shape(_418) - _433 = fluid.layers.conv2d( - _432, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_2_2_weight', - name='_433', - bias_attr='_extras_0_2_2_bias') - _407 = fluid.layers.gather(input=_406, index=_405) - _421 = fluid.layers.gather(input=_420, index=_419) - _434 = fluid.layers.relu(_433, name='_434') - _410 = fluid.layers.assign(_407) - _424 = fluid.layers.assign(_421) - _435 = fluid.layers.conv2d( - _434, - num_filters=6, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_3_weight', - name='_435', - bias_attr='_classification_headers_3_bias') - _447 = fluid.layers.conv2d( - _434, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_3_weight', - name='_447', - bias_attr='_regression_headers_3_bias') - _413 = fluid.layers.concat([_410, _411, _412], axis=0) - _427 = fluid.layers.concat([_424, _425, _426], axis=0) - _436 = fluid.layers.transpose(_435, perm=[0, 2, 3, 1], name='_436') - _448 = fluid.layers.transpose(_447, perm=[0, 2, 3, 1], name='_448') - _413_cast = fluid.layers.cast(_413, dtype='int32') - _414 = fluid.layers.reshape( - _404, name='_414', actual_shape=_413_cast, shape=[1, -1, 2]) - _427_cast = fluid.layers.cast(_427, dtype='int32') - _428 = fluid.layers.reshape( - _418, name='_428', actual_shape=_427_cast, shape=[1, -1, 4]) - _438 = fluid.layers.shape(_436) - _450 = fluid.layers.shape(_448) - _439 = fluid.layers.gather(input=_438, index=_437) - _451 = fluid.layers.gather(input=_450, index=_449) - _442 = fluid.layers.assign(_439) - _454 = fluid.layers.assign(_451) - _445 = fluid.layers.concat([_442, _443, _444], axis=0) - _457 = fluid.layers.concat([_454, _455, _456], axis=0) - _445_cast = fluid.layers.cast(_445, dtype='int32') - _446 = fluid.layers.reshape( - _436, name='_446', actual_shape=_445_cast, shape=[1, -1, 2]) - _457_cast = fluid.layers.cast(_457, dtype='int32') - _458 = fluid.layers.reshape( - _448, name='_458', actual_shape=_457_cast, shape=[1, -1, 4]) - _459 = fluid.layers.concat([_328, _374, _414, _446], axis=1) - _460 = fluid.layers.concat([_342, _388, _428, _458], axis=1) - _scores = fluid.layers.softmax(_459, axis=2, name='_scores') - _462 = fluid.layers.slice(_460, axes=[2], starts=[0], ends=[2]) - _469 = fluid.layers.slice(_460, axes=[2], starts=[2], ends=[4]) - _464 = fluid.layers.elementwise_mul(x=_462, y=_463, name='_464') - _471 = fluid.layers.elementwise_mul(x=_469, y=_470, name='_471') - _466 = fluid.layers.elementwise_mul(x=_464, y=_465, name='_466') - _472 = fluid.layers.exp(_471, name='_472') - _468 = fluid.layers.elementwise_add(x=_466, y=_467, name='_468') - _474 = fluid.layers.elementwise_mul(x=_472, y=_473, name='_474') - _475 = fluid.layers.concat([_468, _474], axis=2) - _476 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _477 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _481 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _482 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _479 = fluid.layers.elementwise_div(x=_477, y=_478, name='_479') - _484 = fluid.layers.elementwise_div(x=_482, y=_483, name='_484') - _480 = fluid.layers.elementwise_sub(x=_476, y=_479, name='_480') - _485 = fluid.layers.elementwise_add(x=_481, y=_484, name='_485') - _boxes = fluid.layers.concat([_480, _485], axis=2) - return [_input], [_scores, _boxes] - - -def run_net(param_dir="./"): - import os - inputs, outputs = face_detector_320() - for i, out in enumerate(outputs): - if isinstance(out, list): - for out_part in out: - outputs.append(out_part) - del outputs[i] - exe = fluid.Executor(fluid.CPUPlace()) - exe.run(fluid.default_startup_program()) - - def if_exist(var): - b = os.path.exists(os.path.join(param_dir, var.name)) - return b - - fluid.io.load_vars( - exe, param_dir, fluid.default_main_program(), predicate=if_exist) diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py deleted file mode 100644 index 8237b7f3d743bdb29923c6dab0ff3d6f576127d2..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py +++ /dev/null @@ -1,226 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from ultra_light_fast_generic_face_detector_1mb_320.processor import postprocess, base64_to_cv2 -from ultra_light_fast_generic_face_detector_1mb_320.data_feed import reader - - -@moduleinfo( - name="ultra_light_fast_generic_face_detector_1mb_320", - type="CV/face_detection", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - summary= - "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", - version="1.1.2") -class FaceDetector320(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ultra_light_fast_generic_face_detector_1mb_320") - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def face_detection(self, - images=None, - paths=None, - data=None, - batch_size=1, - use_gpu=False, - output_dir='face_detector_320_predict_output', - visualization=False, - confs_threshold=0.5, - iou_threshold=0.5): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - confs_threshold (float): threshold for confidence coefficient. - iou_threshold (float): threshold for iou. - - Returns: - res (list[dict()]): The result of face detection and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data and 'image' in data: - if paths is None: - paths = [] - paths += data['image'] - - # get all data - all_data = [] - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = [] - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.astype('float32')) - data_out = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - confidences = data_out[0].as_ndarray() - boxes = data_out[1].as_ndarray() - - # postprocess one by one - for i in range(len(batch_data)): - out = postprocess( - confidences=confidences[i], - boxes=boxes[i], - orig_im=batch_data[i]['orig_im'], - orig_im_shape=batch_data[i]['orig_im_shape'], - orig_im_path=batch_data[i]['orig_im_path'], - output_dir=output_dir, - visualization=visualization, - confs_threshold=confs_threshold, - iou_threshold=iou_threshold) - res.append(out) - return res - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='face_detector_320_predict_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/processor.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/processor.py deleted file mode 100644 index ddb4bc1b212b986d4c257e11db43e195df88564c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/processor.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -import base64 -import cv2 -import numpy as np - -__all__ = ['postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def area_of(left_top, right_bottom): - hw = np.clip(right_bottom - left_top, 0.0, None) - return hw[..., 0] * hw[..., 1] - - -def iou_of(boxes0, boxes1, eps=1e-5): - overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) - overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) - overlap_area = area_of(overlap_left_top, overlap_right_bottom) - area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) - area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) - return overlap_area / (area0 + area1 - overlap_area + eps) - - -def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): - scores = box_scores[:, -1] - boxes = box_scores[:, :-1] - picked = [] - # _, indexes = scores.sort(descending=True) - indexes = np.argsort(scores) - # indexes = indexes[:candidate_size] - indexes = indexes[-candidate_size:] - while len(indexes) > 0: - # current = indexes[0] - current = indexes[-1] - picked.append(current) - if 0 < top_k == len(picked) or len(indexes) == 1: - break - current_box = boxes[current, :] - # indexes = indexes[1:] - indexes = indexes[:-1] - rest_boxes = boxes[indexes, :] - iou = iou_of(rest_boxes, np.expand_dims(current_box, axis=0)) - indexes = indexes[iou <= iou_threshold] - return box_scores[picked, :] - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def postprocess(confidences, - boxes, - orig_im, - orig_im_shape, - orig_im_path, - output_dir, - visualization, - confs_threshold=0.5, - iou_threshold=0.5): - """ - Postprocess output of network. one image at a time. - - Args: - confidences (numpy.ndarray): confidences, with shape [num, 2] - boxes (numpy.ndaray): boxes coordinate, with shape [num, 4] - orig_im (numpy.ndarray): original image. - orig_im_shape (list): shape pf original image. - orig_im_path (list): path of riginal image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - """ - output = {} - output['data'] = [] - if orig_im_path: - output['path'] = orig_im_path - picked_box_probs = [] - picked_labels = [] - for class_index in range(1, confidences.shape[1]): - probs = confidences[:, class_index] - mask = probs > confs_threshold - probs = probs[mask] - if probs.shape[0] == 0: - continue - subset_boxes = boxes[mask, :] - box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) - box_probs = hard_nms(box_probs, iou_threshold=iou_threshold, top_k=-1) - picked_box_probs.append(box_probs) - picked_labels.extend([class_index] * box_probs.shape[0]) - - if not picked_box_probs: - return output - - picked_box_probs = np.concatenate(picked_box_probs) - picked_box_probs[:, 0] *= orig_im_shape[1] - picked_box_probs[:, 1] *= orig_im_shape[0] - picked_box_probs[:, 2] *= orig_im_shape[1] - picked_box_probs[:, 3] *= orig_im_shape[0] - - for data in picked_box_probs: - output['data'].append({ - 'left': float(data[0]), - 'right': float(data[2]), - 'top': float(data[1]), - 'bottom': float(data[3]), - 'confidence': float(data[4]) - }) - - picked_box_probs = picked_box_probs[:, :4].astype(np.int32) - if visualization: - for i in range(picked_box_probs.shape[0]): - box = picked_box_probs[i] - cv2.rectangle(orig_im, (box[0], box[1]), (box[2], box[3]), - (255, 255, 0), 2) - check_dir(output_dir) - ext = os.path.splitext(orig_im_path) if orig_im_path else '' - ext = ext if ext else get_image_ext(orig_im) - orig_im_path = orig_im_path if orig_im_path else 'ndarray_{}{}'.format( - time.time(), ext) - im_name = os.path.basename(orig_im_path) - im_save_path = os.path.join(output_dir, im_name) - output['save_path'] = im_save_path - cv2.imwrite(im_save_path, orig_im) - return output diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/data_feed.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/data_feed.py deleted file mode 100644 index 295bca7e0aa00e348b5e9693aca586d8d682e10b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/data_feed.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding=utf-8 -import os -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def preprocess(orig_image): - image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) - image = cv2.resize(image, (640, 480)) - image_mean = np.array([127, 127, 127]) - image = (image - image_mean) / 128.0 - image = np.transpose(image, [2, 0, 1]) - return image - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['orig_im'] = im - each['orig_im_shape'] = im.shape # height, width, channel - each['orig_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['orig_im'] = im - each['orig_im_path'] = None - each['orig_im_shape'] = im.shape # height, width, channel - component.append(each) - - for element in component: - element['image'] = preprocess(element['orig_im']) - yield element diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py deleted file mode 100644 index 7998950c92708625e1fbefda3b14333fe6b64c5c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py +++ /dev/null @@ -1,1213 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def face_detector(): - _319 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _322 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _323 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _333 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _336 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _337 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _365 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _368 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _369 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _379 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _382 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _383 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _405 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _408 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _409 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _419 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _422 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _423 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _437 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _440 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _441 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _449 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _452 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _453 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _463 = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=0.10000000149011612) - _465 = fluid.layers.create_parameter( - dtype='float32', - shape=[1, 17640, 2], - name='_465', - attr='_465', - default_initializer=Constant(0.0)) - _467 = fluid.layers.create_parameter( - dtype='float32', - shape=[1, 17640, 2], - name='_467', - attr='_467', - default_initializer=Constant(0.0)) - _470 = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=0.20000000298023224) - _473 = fluid.layers.create_parameter( - dtype='float32', - shape=[1, 17640, 2], - name='_473', - attr='_473', - default_initializer=Constant(0.0)) - _478 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _483 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _input = fluid.layers.data( - dtype='float32', - shape=[1, 3, 480, 640], - name='_input', - append_batch_size=False) - _325 = fluid.layers.assign(_322) - _326 = fluid.layers.assign(_323) - _339 = fluid.layers.assign(_336) - _340 = fluid.layers.assign(_337) - _371 = fluid.layers.assign(_368) - _372 = fluid.layers.assign(_369) - _385 = fluid.layers.assign(_382) - _386 = fluid.layers.assign(_383) - _411 = fluid.layers.assign(_408) - _412 = fluid.layers.assign(_409) - _425 = fluid.layers.assign(_422) - _426 = fluid.layers.assign(_423) - _443 = fluid.layers.assign(_440) - _444 = fluid.layers.assign(_441) - _455 = fluid.layers.assign(_452) - _456 = fluid.layers.assign(_453) - _245 = fluid.layers.conv2d( - _input, - num_filters=16, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_0_0_weight', - name='_245', - bias_attr=False) - _246 = fluid.layers.batch_norm( - _245, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_0_1_weight', - bias_attr='_base_net_0_1_bias', - moving_mean_name='_base_net_0_1_running_mean', - moving_variance_name='_base_net_0_1_running_var', - use_global_stats=False, - name='_246') - _247 = fluid.layers.relu(_246, name='_247') - _248 = fluid.layers.conv2d( - _247, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=16, - param_attr='_base_net_1_0_weight', - name='_248', - bias_attr=False) - _249 = fluid.layers.batch_norm( - _248, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_1_weight', - bias_attr='_base_net_1_1_bias', - moving_mean_name='_base_net_1_1_running_mean', - moving_variance_name='_base_net_1_1_running_var', - use_global_stats=False, - name='_249') - _250 = fluid.layers.relu(_249, name='_250') - _251 = fluid.layers.conv2d( - _250, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_1_3_weight', - name='_251', - bias_attr=False) - _252 = fluid.layers.batch_norm( - _251, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_4_weight', - bias_attr='_base_net_1_4_bias', - moving_mean_name='_base_net_1_4_running_mean', - moving_variance_name='_base_net_1_4_running_var', - use_global_stats=False, - name='_252') - _253 = fluid.layers.relu(_252, name='_253') - _254 = fluid.layers.conv2d( - _253, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_2_0_weight', - name='_254', - bias_attr=False) - _255 = fluid.layers.batch_norm( - _254, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_1_weight', - bias_attr='_base_net_2_1_bias', - moving_mean_name='_base_net_2_1_running_mean', - moving_variance_name='_base_net_2_1_running_var', - use_global_stats=False, - name='_255') - _256 = fluid.layers.relu(_255, name='_256') - _257 = fluid.layers.conv2d( - _256, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_2_3_weight', - name='_257', - bias_attr=False) - _258 = fluid.layers.batch_norm( - _257, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_4_weight', - bias_attr='_base_net_2_4_bias', - moving_mean_name='_base_net_2_4_running_mean', - moving_variance_name='_base_net_2_4_running_var', - use_global_stats=False, - name='_258') - _259 = fluid.layers.relu(_258, name='_259') - _260 = fluid.layers.conv2d( - _259, - num_filters=32, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_3_0_weight', - name='_260', - bias_attr=False) - _261 = fluid.layers.batch_norm( - _260, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_1_weight', - bias_attr='_base_net_3_1_bias', - moving_mean_name='_base_net_3_1_running_mean', - moving_variance_name='_base_net_3_1_running_var', - use_global_stats=False, - name='_261') - _262 = fluid.layers.relu(_261, name='_262') - _263 = fluid.layers.conv2d( - _262, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_3_3_weight', - name='_263', - bias_attr=False) - _264 = fluid.layers.batch_norm( - _263, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_4_weight', - bias_attr='_base_net_3_4_bias', - moving_mean_name='_base_net_3_4_running_mean', - moving_variance_name='_base_net_3_4_running_var', - use_global_stats=False, - name='_264') - _265 = fluid.layers.relu(_264, name='_265') - _266 = fluid.layers.conv2d( - _265, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_4_0_weight', - name='_266', - bias_attr=False) - _267 = fluid.layers.batch_norm( - _266, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_1_weight', - bias_attr='_base_net_4_1_bias', - moving_mean_name='_base_net_4_1_running_mean', - moving_variance_name='_base_net_4_1_running_var', - use_global_stats=False, - name='_267') - _268 = fluid.layers.relu(_267, name='_268') - _269 = fluid.layers.conv2d( - _268, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_4_3_weight', - name='_269', - bias_attr=False) - _270 = fluid.layers.batch_norm( - _269, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_4_weight', - bias_attr='_base_net_4_4_bias', - moving_mean_name='_base_net_4_4_running_mean', - moving_variance_name='_base_net_4_4_running_var', - use_global_stats=False, - name='_270') - _271 = fluid.layers.relu(_270, name='_271') - _272 = fluid.layers.conv2d( - _271, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_5_0_weight', - name='_272', - bias_attr=False) - _273 = fluid.layers.batch_norm( - _272, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_1_weight', - bias_attr='_base_net_5_1_bias', - moving_mean_name='_base_net_5_1_running_mean', - moving_variance_name='_base_net_5_1_running_var', - use_global_stats=False, - name='_273') - _274 = fluid.layers.relu(_273, name='_274') - _275 = fluid.layers.conv2d( - _274, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_5_3_weight', - name='_275', - bias_attr=False) - _276 = fluid.layers.batch_norm( - _275, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_4_weight', - bias_attr='_base_net_5_4_bias', - moving_mean_name='_base_net_5_4_running_mean', - moving_variance_name='_base_net_5_4_running_var', - use_global_stats=False, - name='_276') - _277 = fluid.layers.relu(_276, name='_277') - _278 = fluid.layers.conv2d( - _277, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_6_0_weight', - name='_278', - bias_attr=False) - _279 = fluid.layers.batch_norm( - _278, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_1_weight', - bias_attr='_base_net_6_1_bias', - moving_mean_name='_base_net_6_1_running_mean', - moving_variance_name='_base_net_6_1_running_var', - use_global_stats=False, - name='_279') - _280 = fluid.layers.relu(_279, name='_280') - _281 = fluid.layers.conv2d( - _280, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_6_3_weight', - name='_281', - bias_attr=False) - _282 = fluid.layers.batch_norm( - _281, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_4_weight', - bias_attr='_base_net_6_4_bias', - moving_mean_name='_base_net_6_4_running_mean', - moving_variance_name='_base_net_6_4_running_var', - use_global_stats=False, - name='_282') - _283 = fluid.layers.relu(_282, name='_283') - _284 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_0_conv_weight', - name='_284', - bias_attr=False) - _291 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_0_conv_weight', - name='_291', - bias_attr=False) - _298 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_0_conv_weight', - name='_298', - bias_attr=False) - _311 = fluid.layers.conv2d( - _283, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_shortcut_conv_weight', - name='_311', - bias_attr=False) - _285 = fluid.layers.batch_norm( - _284, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_0_bn_weight', - bias_attr='_base_net_7_branch0_0_bn_bias', - moving_mean_name='_base_net_7_branch0_0_bn_running_mean', - moving_variance_name='_base_net_7_branch0_0_bn_running_var', - use_global_stats=False, - name='_285') - _292 = fluid.layers.batch_norm( - _291, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_0_bn_weight', - bias_attr='_base_net_7_branch1_0_bn_bias', - moving_mean_name='_base_net_7_branch1_0_bn_running_mean', - moving_variance_name='_base_net_7_branch1_0_bn_running_var', - use_global_stats=False, - name='_292') - _299 = fluid.layers.batch_norm( - _298, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_0_bn_weight', - bias_attr='_base_net_7_branch2_0_bn_bias', - moving_mean_name='_base_net_7_branch2_0_bn_running_mean', - moving_variance_name='_base_net_7_branch2_0_bn_running_var', - use_global_stats=False, - name='_299') - _312 = fluid.layers.batch_norm( - _311, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_shortcut_bn_weight', - bias_attr='_base_net_7_shortcut_bn_bias', - moving_mean_name='_base_net_7_shortcut_bn_running_mean', - moving_variance_name='_base_net_7_shortcut_bn_running_var', - use_global_stats=False, - name='_312') - _286 = fluid.layers.conv2d( - _285, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_1_conv_weight', - name='_286', - bias_attr=False) - _293 = fluid.layers.conv2d( - _292, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_1_conv_weight', - name='_293', - bias_attr=False) - _300 = fluid.layers.conv2d( - _299, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_1_conv_weight', - name='_300', - bias_attr=False) - _287 = fluid.layers.batch_norm( - _286, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_1_bn_weight', - bias_attr='_base_net_7_branch0_1_bn_bias', - moving_mean_name='_base_net_7_branch0_1_bn_running_mean', - moving_variance_name='_base_net_7_branch0_1_bn_running_var', - use_global_stats=False, - name='_287') - _294 = fluid.layers.batch_norm( - _293, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_1_bn_weight', - bias_attr='_base_net_7_branch1_1_bn_bias', - moving_mean_name='_base_net_7_branch1_1_bn_running_mean', - moving_variance_name='_base_net_7_branch1_1_bn_running_var', - use_global_stats=False, - name='_294') - _301 = fluid.layers.batch_norm( - _300, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_1_bn_weight', - bias_attr='_base_net_7_branch2_1_bn_bias', - moving_mean_name='_base_net_7_branch2_1_bn_running_mean', - moving_variance_name='_base_net_7_branch2_1_bn_running_var', - use_global_stats=False, - name='_301') - _288 = fluid.layers.relu(_287, name='_288') - _295 = fluid.layers.relu(_294, name='_295') - _302 = fluid.layers.relu(_301, name='_302') - _289 = fluid.layers.conv2d( - _288, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[2, 2], - dilation=[2, 2], - groups=1, - param_attr='_base_net_7_branch0_2_conv_weight', - name='_289', - bias_attr=False) - _296 = fluid.layers.conv2d( - _295, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[3, 3], - dilation=[3, 3], - groups=1, - param_attr='_base_net_7_branch1_2_conv_weight', - name='_296', - bias_attr=False) - _303 = fluid.layers.conv2d( - _302, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_2_conv_weight', - name='_303', - bias_attr=False) - _290 = fluid.layers.batch_norm( - _289, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_2_bn_weight', - bias_attr='_base_net_7_branch0_2_bn_bias', - moving_mean_name='_base_net_7_branch0_2_bn_running_mean', - moving_variance_name='_base_net_7_branch0_2_bn_running_var', - use_global_stats=False, - name='_290') - _297 = fluid.layers.batch_norm( - _296, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_2_bn_weight', - bias_attr='_base_net_7_branch1_2_bn_bias', - moving_mean_name='_base_net_7_branch1_2_bn_running_mean', - moving_variance_name='_base_net_7_branch1_2_bn_running_var', - use_global_stats=False, - name='_297') - _304 = fluid.layers.batch_norm( - _303, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_2_bn_weight', - bias_attr='_base_net_7_branch2_2_bn_bias', - moving_mean_name='_base_net_7_branch2_2_bn_running_mean', - moving_variance_name='_base_net_7_branch2_2_bn_running_var', - use_global_stats=False, - name='_304') - _305 = fluid.layers.relu(_304, name='_305') - _306 = fluid.layers.conv2d( - _305, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[5, 5], - dilation=[5, 5], - groups=1, - param_attr='_base_net_7_branch2_3_conv_weight', - name='_306', - bias_attr=False) - _307 = fluid.layers.batch_norm( - _306, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_3_bn_weight', - bias_attr='_base_net_7_branch2_3_bn_bias', - moving_mean_name='_base_net_7_branch2_3_bn_running_mean', - moving_variance_name='_base_net_7_branch2_3_bn_running_var', - use_global_stats=False, - name='_307') - _308 = fluid.layers.concat([_290, _297, _307], axis=1) - _309 = fluid.layers.conv2d( - _308, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_ConvLinear_conv_weight', - name='_309', - bias_attr=False) - _310 = fluid.layers.batch_norm( - _309, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_ConvLinear_bn_weight', - bias_attr='_base_net_7_ConvLinear_bn_bias', - moving_mean_name='_base_net_7_ConvLinear_bn_running_mean', - moving_variance_name='_base_net_7_ConvLinear_bn_running_var', - use_global_stats=False, - name='_310') - _313 = fluid.layers.elementwise_add(x=_310, y=_312, name='_313') - _314 = fluid.layers.relu(_313, name='_314') - _315 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_classification_headers_0_0_weight', - name='_315', - bias_attr='_classification_headers_0_0_bias') - _329 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_regression_headers_0_0_weight', - name='_329', - bias_attr='_regression_headers_0_0_bias') - _343 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_8_0_weight', - name='_343', - bias_attr=False) - _316 = fluid.layers.relu(_315, name='_316') - _330 = fluid.layers.relu(_329, name='_330') - _344 = fluid.layers.batch_norm( - _343, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_1_weight', - bias_attr='_base_net_8_1_bias', - moving_mean_name='_base_net_8_1_running_mean', - moving_variance_name='_base_net_8_1_running_var', - use_global_stats=False, - name='_344') - _317 = fluid.layers.conv2d( - _316, - num_filters=6, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_0_2_weight', - name='_317', - bias_attr='_classification_headers_0_2_bias') - _331 = fluid.layers.conv2d( - _330, - num_filters=12, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_0_2_weight', - name='_331', - bias_attr='_regression_headers_0_2_bias') - _345 = fluid.layers.relu(_344, name='_345') - _318 = fluid.layers.transpose(_317, perm=[0, 2, 3, 1], name='_318') - _332 = fluid.layers.transpose(_331, perm=[0, 2, 3, 1], name='_332') - _346 = fluid.layers.conv2d( - _345, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_8_3_weight', - name='_346', - bias_attr=False) - _320 = fluid.layers.shape(_318) - _334 = fluid.layers.shape(_332) - _347 = fluid.layers.batch_norm( - _346, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_4_weight', - bias_attr='_base_net_8_4_bias', - moving_mean_name='_base_net_8_4_running_mean', - moving_variance_name='_base_net_8_4_running_var', - use_global_stats=False, - name='_347') - _321 = fluid.layers.gather(input=_320, index=_319) - _335 = fluid.layers.gather(input=_334, index=_333) - _348 = fluid.layers.relu(_347, name='_348') - _324 = fluid.layers.assign(_321) - _338 = fluid.layers.assign(_335) - _349 = fluid.layers.conv2d( - _348, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_9_0_weight', - name='_349', - bias_attr=False) - _327 = fluid.layers.concat([_324, _325, _326], axis=0) - _341 = fluid.layers.concat([_338, _339, _340], axis=0) - _350 = fluid.layers.batch_norm( - _349, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_1_weight', - bias_attr='_base_net_9_1_bias', - moving_mean_name='_base_net_9_1_running_mean', - moving_variance_name='_base_net_9_1_running_var', - use_global_stats=False, - name='_350') - _327_cast = fluid.layers.cast(_327, dtype='int32') - _328 = fluid.layers.reshape( - _318, name='_328', actual_shape=_327_cast, shape=[1, -1, 2]) - _341_cast = fluid.layers.cast(_341, dtype='int32') - _342 = fluid.layers.reshape( - _332, name='_342', actual_shape=_341_cast, shape=[1, -1, 4]) - _351 = fluid.layers.relu(_350, name='_351') - _352 = fluid.layers.conv2d( - _351, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_9_3_weight', - name='_352', - bias_attr=False) - _353 = fluid.layers.batch_norm( - _352, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_4_weight', - bias_attr='_base_net_9_4_bias', - moving_mean_name='_base_net_9_4_running_mean', - moving_variance_name='_base_net_9_4_running_var', - use_global_stats=False, - name='_353') - _354 = fluid.layers.relu(_353, name='_354') - _355 = fluid.layers.conv2d( - _354, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_10_0_weight', - name='_355', - bias_attr=False) - _356 = fluid.layers.batch_norm( - _355, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_1_weight', - bias_attr='_base_net_10_1_bias', - moving_mean_name='_base_net_10_1_running_mean', - moving_variance_name='_base_net_10_1_running_var', - use_global_stats=False, - name='_356') - _357 = fluid.layers.relu(_356, name='_357') - _358 = fluid.layers.conv2d( - _357, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_10_3_weight', - name='_358', - bias_attr=False) - _359 = fluid.layers.batch_norm( - _358, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_4_weight', - bias_attr='_base_net_10_4_bias', - moving_mean_name='_base_net_10_4_running_mean', - moving_variance_name='_base_net_10_4_running_var', - use_global_stats=False, - name='_359') - _360 = fluid.layers.relu(_359, name='_360') - _361 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_classification_headers_1_0_weight', - name='_361', - bias_attr='_classification_headers_1_0_bias') - _375 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_regression_headers_1_0_weight', - name='_375', - bias_attr='_regression_headers_1_0_bias') - _389 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_11_0_weight', - name='_389', - bias_attr=False) - _362 = fluid.layers.relu(_361, name='_362') - _376 = fluid.layers.relu(_375, name='_376') - _390 = fluid.layers.batch_norm( - _389, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_1_weight', - bias_attr='_base_net_11_1_bias', - moving_mean_name='_base_net_11_1_running_mean', - moving_variance_name='_base_net_11_1_running_var', - use_global_stats=False, - name='_390') - _363 = fluid.layers.conv2d( - _362, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_1_2_weight', - name='_363', - bias_attr='_classification_headers_1_2_bias') - _377 = fluid.layers.conv2d( - _376, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_1_2_weight', - name='_377', - bias_attr='_regression_headers_1_2_bias') - _391 = fluid.layers.relu(_390, name='_391') - _364 = fluid.layers.transpose(_363, perm=[0, 2, 3, 1], name='_364') - _378 = fluid.layers.transpose(_377, perm=[0, 2, 3, 1], name='_378') - _392 = fluid.layers.conv2d( - _391, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_11_3_weight', - name='_392', - bias_attr=False) - _366 = fluid.layers.shape(_364) - _380 = fluid.layers.shape(_378) - _393 = fluid.layers.batch_norm( - _392, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_4_weight', - bias_attr='_base_net_11_4_bias', - moving_mean_name='_base_net_11_4_running_mean', - moving_variance_name='_base_net_11_4_running_var', - use_global_stats=False, - name='_393') - _367 = fluid.layers.gather(input=_366, index=_365) - _381 = fluid.layers.gather(input=_380, index=_379) - _394 = fluid.layers.relu(_393, name='_394') - _370 = fluid.layers.assign(_367) - _384 = fluid.layers.assign(_381) - _395 = fluid.layers.conv2d( - _394, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_base_net_12_0_weight', - name='_395', - bias_attr=False) - _373 = fluid.layers.concat([_370, _371, _372], axis=0) - _387 = fluid.layers.concat([_384, _385, _386], axis=0) - _396 = fluid.layers.batch_norm( - _395, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_1_weight', - bias_attr='_base_net_12_1_bias', - moving_mean_name='_base_net_12_1_running_mean', - moving_variance_name='_base_net_12_1_running_var', - use_global_stats=False, - name='_396') - _373_cast = fluid.layers.cast(_373, dtype='int32') - _374 = fluid.layers.reshape( - _364, name='_374', actual_shape=_373_cast, shape=[1, -1, 2]) - _387_cast = fluid.layers.cast(_387, dtype='int32') - _388 = fluid.layers.reshape( - _378, name='_388', actual_shape=_387_cast, shape=[1, -1, 4]) - _397 = fluid.layers.relu(_396, name='_397') - _398 = fluid.layers.conv2d( - _397, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_12_3_weight', - name='_398', - bias_attr=False) - _399 = fluid.layers.batch_norm( - _398, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_4_weight', - bias_attr='_base_net_12_4_bias', - moving_mean_name='_base_net_12_4_running_mean', - moving_variance_name='_base_net_12_4_running_var', - use_global_stats=False, - name='_399') - _400 = fluid.layers.relu(_399, name='_400') - _401 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_classification_headers_2_0_weight', - name='_401', - bias_attr='_classification_headers_2_0_bias') - _415 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_regression_headers_2_0_weight', - name='_415', - bias_attr='_regression_headers_2_0_bias') - _429 = fluid.layers.conv2d( - _400, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_0_weight', - name='_429', - bias_attr='_extras_0_0_bias') - _402 = fluid.layers.relu(_401, name='_402') - _416 = fluid.layers.relu(_415, name='_416') - _430 = fluid.layers.relu(_429, name='_430') - _403 = fluid.layers.conv2d( - _402, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_2_2_weight', - name='_403', - bias_attr='_classification_headers_2_2_bias') - _417 = fluid.layers.conv2d( - _416, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_2_2_weight', - name='_417', - bias_attr='_regression_headers_2_2_bias') - _431 = fluid.layers.conv2d( - _430, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_extras_0_2_0_weight', - name='_431', - bias_attr='_extras_0_2_0_bias') - _404 = fluid.layers.transpose(_403, perm=[0, 2, 3, 1], name='_404') - _418 = fluid.layers.transpose(_417, perm=[0, 2, 3, 1], name='_418') - _432 = fluid.layers.relu(_431, name='_432') - _406 = fluid.layers.shape(_404) - _420 = fluid.layers.shape(_418) - _433 = fluid.layers.conv2d( - _432, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_2_2_weight', - name='_433', - bias_attr='_extras_0_2_2_bias') - _407 = fluid.layers.gather(input=_406, index=_405) - _421 = fluid.layers.gather(input=_420, index=_419) - _434 = fluid.layers.relu(_433, name='_434') - _410 = fluid.layers.assign(_407) - _424 = fluid.layers.assign(_421) - _435 = fluid.layers.conv2d( - _434, - num_filters=6, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_3_weight', - name='_435', - bias_attr='_classification_headers_3_bias') - _447 = fluid.layers.conv2d( - _434, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_3_weight', - name='_447', - bias_attr='_regression_headers_3_bias') - _413 = fluid.layers.concat([_410, _411, _412], axis=0) - _427 = fluid.layers.concat([_424, _425, _426], axis=0) - _436 = fluid.layers.transpose(_435, perm=[0, 2, 3, 1], name='_436') - _448 = fluid.layers.transpose(_447, perm=[0, 2, 3, 1], name='_448') - _413_cast = fluid.layers.cast(_413, dtype='int32') - _414 = fluid.layers.reshape( - _404, name='_414', actual_shape=_413_cast, shape=[1, -1, 2]) - _427_cast = fluid.layers.cast(_427, dtype='int32') - _428 = fluid.layers.reshape( - _418, name='_428', actual_shape=_427_cast, shape=[1, -1, 4]) - _438 = fluid.layers.shape(_436) - _450 = fluid.layers.shape(_448) - _439 = fluid.layers.gather(input=_438, index=_437) - _451 = fluid.layers.gather(input=_450, index=_449) - _442 = fluid.layers.assign(_439) - _454 = fluid.layers.assign(_451) - _445 = fluid.layers.concat([_442, _443, _444], axis=0) - _457 = fluid.layers.concat([_454, _455, _456], axis=0) - _445_cast = fluid.layers.cast(_445, dtype='int32') - _446 = fluid.layers.reshape( - _436, name='_446', actual_shape=_445_cast, shape=[1, -1, 2]) - _457_cast = fluid.layers.cast(_457, dtype='int32') - _458 = fluid.layers.reshape( - _448, name='_458', actual_shape=_457_cast, shape=[1, -1, 4]) - _459 = fluid.layers.concat([_328, _374, _414, _446], axis=1) - _460 = fluid.layers.concat([_342, _388, _428, _458], axis=1) - _scores = fluid.layers.softmax(_459, axis=2, name='_scores') - _462 = fluid.layers.slice(_460, axes=[2], starts=[0], ends=[2]) - _469 = fluid.layers.slice(_460, axes=[2], starts=[2], ends=[4]) - _464 = fluid.layers.elementwise_mul(x=_462, y=_463, name='_464') - _471 = fluid.layers.elementwise_mul(x=_469, y=_470, name='_471') - _466 = fluid.layers.elementwise_mul(x=_464, y=_465, name='_466') - _472 = fluid.layers.exp(_471, name='_472') - _468 = fluid.layers.elementwise_add(x=_466, y=_467, name='_468') - _474 = fluid.layers.elementwise_mul(x=_472, y=_473, name='_474') - _475 = fluid.layers.concat([_468, _474], axis=2) - _476 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _477 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _481 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _482 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _479 = fluid.layers.elementwise_div(x=_477, y=_478, name='_479') - _484 = fluid.layers.elementwise_div(x=_482, y=_483, name='_484') - _480 = fluid.layers.elementwise_sub(x=_476, y=_479, name='_480') - _485 = fluid.layers.elementwise_add(x=_481, y=_484, name='_485') - _boxes = fluid.layers.concat([_480, _485], axis=2) - - return [_input], [_scores, _boxes] - - -def run_net(param_dir="./"): - import os - inputs, outputs = face_detector() - for i, out in enumerate(outputs): - if isinstance(out, list): - for out_part in out: - outputs.append(out_part) - del outputs[i] - exe = fluid.Executor(fluid.CPUPlace()) - exe.run(fluid.default_startup_program()) - - def if_exist(var): - b = os.path.exists(os.path.join(param_dir, var.name)) - return b - - fluid.io.load_vars( - exe, param_dir, fluid.default_main_program(), predicate=if_exist) diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py deleted file mode 100644 index 1635237858b6a6c757d52e4df843063c46f7a0f6..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py +++ /dev/null @@ -1,225 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from ultra_light_fast_generic_face_detector_1mb_640.processor import postprocess, base64_to_cv2 -from ultra_light_fast_generic_face_detector_1mb_640.data_feed import reader - - -@moduleinfo( - name="ultra_light_fast_generic_face_detector_1mb_640", - type="CV/face_detection", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - summary= - "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", - version="1.1.2") -class FaceDetector640(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ultra_light_fast_generic_face_detector_1mb_640") - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def face_detection(self, - images=None, - paths=None, - data=None, - batch_size=1, - use_gpu=False, - output_dir='face_detector_640_predict_output', - visualization=False, - confs_threshold=0.5, - iou_threshold=0.5): - """ - API for face detection. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - confs_threshold (float): threshold for confidence coefficient. - iou_threshold (float): threshold for iou. - Returns: - res (list[dict()]): The result of face detection and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data and 'image' in data: - if paths is None: - paths = [] - paths += data['image'] - - # get all data - all_data = [] - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = [] - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.astype('float32')) - data_out = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - confidences = data_out[0].as_ndarray() - boxes = data_out[1].as_ndarray() - - # postprocess one by one - for i in range(len(batch_data)): - out = postprocess( - confidences=confidences[i], - boxes=boxes[i], - orig_im=batch_data[i]['orig_im'], - orig_im_shape=batch_data[i]['orig_im_shape'], - orig_im_path=batch_data[i]['orig_im_path'], - output_dir=output_dir, - visualization=visualization, - confs_threshold=confs_threshold, - iou_threshold=iou_threshold) - res.append(out) - return res - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.face_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='face_detector_640_predict_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/processor.py b/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/processor.py deleted file mode 100644 index ddb4bc1b212b986d4c257e11db43e195df88564c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/processor.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -import base64 -import cv2 -import numpy as np - -__all__ = ['postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def area_of(left_top, right_bottom): - hw = np.clip(right_bottom - left_top, 0.0, None) - return hw[..., 0] * hw[..., 1] - - -def iou_of(boxes0, boxes1, eps=1e-5): - overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) - overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) - overlap_area = area_of(overlap_left_top, overlap_right_bottom) - area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) - area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) - return overlap_area / (area0 + area1 - overlap_area + eps) - - -def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): - scores = box_scores[:, -1] - boxes = box_scores[:, :-1] - picked = [] - # _, indexes = scores.sort(descending=True) - indexes = np.argsort(scores) - # indexes = indexes[:candidate_size] - indexes = indexes[-candidate_size:] - while len(indexes) > 0: - # current = indexes[0] - current = indexes[-1] - picked.append(current) - if 0 < top_k == len(picked) or len(indexes) == 1: - break - current_box = boxes[current, :] - # indexes = indexes[1:] - indexes = indexes[:-1] - rest_boxes = boxes[indexes, :] - iou = iou_of(rest_boxes, np.expand_dims(current_box, axis=0)) - indexes = indexes[iou <= iou_threshold] - return box_scores[picked, :] - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def postprocess(confidences, - boxes, - orig_im, - orig_im_shape, - orig_im_path, - output_dir, - visualization, - confs_threshold=0.5, - iou_threshold=0.5): - """ - Postprocess output of network. one image at a time. - - Args: - confidences (numpy.ndarray): confidences, with shape [num, 2] - boxes (numpy.ndaray): boxes coordinate, with shape [num, 4] - orig_im (numpy.ndarray): original image. - orig_im_shape (list): shape pf original image. - orig_im_path (list): path of riginal image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - """ - output = {} - output['data'] = [] - if orig_im_path: - output['path'] = orig_im_path - picked_box_probs = [] - picked_labels = [] - for class_index in range(1, confidences.shape[1]): - probs = confidences[:, class_index] - mask = probs > confs_threshold - probs = probs[mask] - if probs.shape[0] == 0: - continue - subset_boxes = boxes[mask, :] - box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) - box_probs = hard_nms(box_probs, iou_threshold=iou_threshold, top_k=-1) - picked_box_probs.append(box_probs) - picked_labels.extend([class_index] * box_probs.shape[0]) - - if not picked_box_probs: - return output - - picked_box_probs = np.concatenate(picked_box_probs) - picked_box_probs[:, 0] *= orig_im_shape[1] - picked_box_probs[:, 1] *= orig_im_shape[0] - picked_box_probs[:, 2] *= orig_im_shape[1] - picked_box_probs[:, 3] *= orig_im_shape[0] - - for data in picked_box_probs: - output['data'].append({ - 'left': float(data[0]), - 'right': float(data[2]), - 'top': float(data[1]), - 'bottom': float(data[3]), - 'confidence': float(data[4]) - }) - - picked_box_probs = picked_box_probs[:, :4].astype(np.int32) - if visualization: - for i in range(picked_box_probs.shape[0]): - box = picked_box_probs[i] - cv2.rectangle(orig_im, (box[0], box[1]), (box[2], box[3]), - (255, 255, 0), 2) - check_dir(output_dir) - ext = os.path.splitext(orig_im_path) if orig_im_path else '' - ext = ext if ext else get_image_ext(orig_im) - orig_im_path = orig_im_path if orig_im_path else 'ndarray_{}{}'.format( - time.time(), ext) - im_name = os.path.basename(orig_im_path) - im_save_path = os.path.join(output_dir, im_name) - output['save_path'] = im_save_path - cv2.imwrite(im_save_path, orig_im) - return output diff --git a/hub_module/modules/image/keypoint_detection/face_landmark_localization/data_feed.py b/hub_module/modules/image/keypoint_detection/face_landmark_localization/data_feed.py deleted file mode 100644 index d8159ea62702bba3e8928b3415a829bacbe355c6..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/face_landmark_localization/data_feed.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - - -def reader(face_detector, images=None, paths=None, use_gpu=False): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - components = [] - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['orig_im'] = im - each['orig_im_shape'] = im.shape - each['orig_im_path'] = im_path - components.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['orig_im'] = im - each['orig_im_path'] = None - each['orig_im_shape'] = im.shape - components.append(each) - - for idx, item in enumerate( - face_detector.face_detection( - images=[component['orig_im'] for component in components], - use_gpu=use_gpu, - visualization=False)): - for face in item['data']: - width = int(components[idx]['orig_im_shape'][1]) - height = int(components[idx]['orig_im_shape'][0]) - x1 = 0 if int(face['left']) < 0 else int(face['left']) - x2 = width if int(face['right']) > width else int(face['right']) - y1 = 0 if int(face['top']) < 0 else int(face['top']) - y2 = height if int(face['bottom']) > height else int(face['bottom']) - roi = components[idx]['orig_im'][y1:y2 + 1, x1:x2 + 1, :] - gray_img = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY) - gray_img = cv2.resize( - gray_img, (60, 60), interpolation=cv2.INTER_CUBIC) - mean, std_dev = cv2.meanStdDev(gray_img) - gray_img = (gray_img - mean[0][0]) / (0.000001 + std_dev[0][0]) - gray_img = np.expand_dims(gray_img, axis=0) - yield { - 'face': gray_img, - 'x1': x1, - 'y1': y1, - 'x2': x2, - 'y2': y2, - 'orig_im': components[idx]['orig_im'], - 'orig_im_path': components[idx]['orig_im_path'], - 'orig_im_shape': components[idx]['orig_im_shape'], - 'id': idx - } diff --git a/hub_module/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py b/hub_module/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py deleted file mode 100644 index 7bae8465d0b7c9dd6b1c646dfc0b86d17d766693..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py +++ /dev/null @@ -1,109 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import paddle.fluid as fluid - -__all__ = ["face_landmark_localization"] - - -def face_landmark_localization(image): - # image = fluid.layers.data(shape=[1, 60, 60], name='data', dtype='float32') - Conv1 = fluid.layers.conv2d( - image, - param_attr='Conv1_weights', - name='Conv1', - dilation=[1, 1], - filter_size=[5, 5], - stride=[1, 1], - groups=1, - bias_attr='Conv1_bias', - padding=[2, 2], - num_filters=20) - ActivationTangH1 = fluid.layers.tanh(Conv1, name='ActivationTangH1') - ActivationAbs1 = fluid.layers.abs(ActivationTangH1, name='ActivationAbs1') - Pool1 = fluid.layers.pool2d( - ActivationAbs1, - exclusive=False, - pool_type='max', - pool_padding=[0, 0], - name='Pool1', - global_pooling=False, - pool_stride=[2, 2], - ceil_mode=True, - pool_size=[2, 2]) - Conv2 = fluid.layers.conv2d( - Pool1, - param_attr='Conv2_weights', - name='Conv2', - dilation=[1, 1], - filter_size=[5, 5], - stride=[1, 1], - groups=1, - bias_attr='Conv2_bias', - padding=[2, 2], - num_filters=48) - ActivationTangH2 = fluid.layers.tanh(Conv2, name='ActivationTangH2') - ActivationAbs2 = fluid.layers.abs(ActivationTangH2, name='ActivationAbs2') - Pool2 = fluid.layers.pool2d( - ActivationAbs2, - exclusive=False, - pool_type='max', - pool_padding=[0, 0], - name='Pool2', - global_pooling=False, - pool_stride=[2, 2], - ceil_mode=True, - pool_size=[2, 2]) - Conv3 = fluid.layers.conv2d( - Pool2, - param_attr='Conv3_weights', - name='Conv3', - dilation=[1, 1], - filter_size=[3, 3], - stride=[1, 1], - groups=1, - bias_attr='Conv3_bias', - padding=[0, 0], - num_filters=64) - ActivationTangH3 = fluid.layers.tanh(Conv3, name='ActivationTangH3') - ActivationAbs3 = fluid.layers.abs(ActivationTangH3, name='ActivationAbs3') - Pool3 = fluid.layers.pool2d( - ActivationAbs3, - exclusive=False, - pool_type='max', - pool_padding=[0, 0], - name='Pool3', - global_pooling=False, - pool_stride=[2, 2], - ceil_mode=True, - pool_size=[3, 3]) - Conv4 = fluid.layers.conv2d( - Pool3, - param_attr='Conv4_weights', - name='Conv4', - dilation=[1, 1], - filter_size=[3, 3], - stride=[1, 1], - groups=1, - bias_attr='Conv4_bias', - padding=[0, 0], - num_filters=80) - ActivationTangH4 = fluid.layers.tanh(Conv4, name='ActivationTangH4') - ActivationAbs4 = fluid.layers.abs(ActivationTangH4, name='ActivationAbs4') - Dense1 = fluid.layers.fc( - ActivationAbs4, - param_attr='Dense1_weights', - act=None, - name='Dense1', - size=512, - bias_attr='Dense1_bias') - ActivationTangH5 = fluid.layers.tanh(Dense1, name='ActivationTangH5') - ActivationAbs5 = fluid.layers.abs(ActivationTangH5, name='ActivationAbs5') - Dense3 = fluid.layers.fc( - ActivationAbs5, - param_attr='Dense3_weights', - act=None, - name='Dense3', - size=136, - bias_attr='Dense3_bias') - return Dense3 diff --git a/hub_module/modules/image/keypoint_detection/face_landmark_localization/module.py b/hub_module/modules/image/keypoint_detection/face_landmark_localization/module.py deleted file mode 100644 index 5b21ad7902119b3e0d448f570fa474ebf8a79b1c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/face_landmark_localization/module.py +++ /dev/null @@ -1,231 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import time -import os -from collections import OrderedDict - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from face_landmark_localization.processor import postprocess, base64_to_cv2 -from face_landmark_localization.data_feed import reader - - -@moduleinfo( - name="face_landmark_localization", - type="CV/keypoint_detection", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - summary= - "Face_Landmark_Localization can be used to locate face landmark. This Module is trained through the MPII Human Pose dataset.", - version="1.0.2") -class FaceLandmarkLocalization(hub.Module): - def _initialize(self, face_detector_module=None): - """ - Args: - face_detector_module (class): module to detect face. - """ - self.default_pretrained_model_path = os.path.join( - self.directory, "face_landmark_localization") - if face_detector_module is None: - self.face_detector = hub.Module( - name="ultra_light_fast_generic_face_detector_1mb_640") - else: - self.face_detector = face_detector_module - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def set_face_detector_module(self, face_detector_module): - """ - Set face detector. - - Args: - face_detector_module (class): module to detect face. - """ - self.face_detector = face_detector_module - - def get_face_detector_module(self): - return self.face_detector - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - face_landmark_dir = os.path.join(dirname, "face_landmark") - detector_dir = os.path.join(dirname, "detector") - - fluid.io.save_inference_model( - dirname=face_landmark_dir, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - self.face_detector.save_inference_model( - dirname=detector_dir, - model_filename=model_filename, - params_filename=params_filename, - combined=combined) - - def keypoint_detection(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - output_dir='face_landmark_output', - visualization=False): - """ - API for face landmark. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - - Returns: - res (list[dict()]): The key points of face landmark and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # get all data - all_data = [] - for yield_data in reader(self.face_detector, images, paths, use_gpu): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = [] - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['face'] for data in batch_data]) - face_tensor = PaddleTensor(batch_image.astype('float32')) - pred_out = self.gpu_predictor.run([ - face_tensor - ]) if use_gpu else self.cpu_predictor.run([face_tensor]) - points = pred_out[0].as_ndarray() - for idx, sample in enumerate(batch_data): - sample['points'] = points[idx].reshape(68, -1) - res += batch_data - - res = postprocess(res, output_dir, visualization) - return res - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.keypoint_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.keypoint_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default=None, - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/keypoint_detection/face_landmark_localization/processor.py b/hub_module/modules/image/keypoint_detection/face_landmark_localization/processor.py deleted file mode 100644 index 373951698d52fc29ad178c0bbf98febc2573ae38..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/face_landmark_localization/processor.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import base64 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['check_dir', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def postprocess(res, output_dir, visualization): - """ - postprocess ouput of network, one face at a time. - """ - output = [] - _cur_id = -1 - for idx, _result in enumerate(res): - if _result['id'] != _cur_id: - _cur_id = _result['id'] - output.append({'data': []}) - _result['points'][:, 0] *= (_result['x2'] - _result['x1']) - _result['points'][:, 0] += _result['x1'] - _result['points'][:, 1] *= (_result['y2'] - _result['y1']) - _result['points'][:, 1] += _result['y1'] - output[-1]['data'].append(_result['points'].tolist()) - - if visualization: - check_dir(output_dir) - for idx, sample in enumerate(output): - orig_im = res[idx]['orig_im'] - for points in sample['data']: - for x, y in points: - cv2.circle(orig_im, (int(x), int(y)), 1, (0, 0, 255), 2) - orig_im_path = res[idx]['orig_im_path'] - ext = os.path.splitext(orig_im_path) if orig_im_path else '' - ext = ext if ext else get_image_ext(orig_im) - org_im_path = orig_im_path if orig_im_path else 'ndarray_{}{}'.format( - time.time(), ext) - im_name = os.path.basename(org_im_path) - im_save_path = os.path.join(output_dir, im_name) - sample['save_path'] = im_save_path - cv2.imwrite(im_save_path, orig_im) - - return output diff --git a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py b/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py deleted file mode 100644 index 0cb18f2a31f6155efe0ba705375c55236ddd43ca..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path).astype('float32') - each['org_im'] = im - each['org_im_path'] = im_path - each['org_im_shape'] = im.shape - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_shape'] = im.shape - component.append(each) - - for element in component: - im = element['org_im'].copy() - im = cv2.resize(im, (384, 384)) - im = im.astype('float32') - im = im.transpose((2, 0, 1)) / 255 - im -= np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) - im /= np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - element['image'] = im - yield element diff --git a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py b/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py deleted file mode 100644 index 767c4bd59d9341570a2d92950d500d4aa798d4dc..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py +++ /dev/null @@ -1,204 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import os -import argparse - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from human_pose_estimation_resnet50_mpii.processor import base64_to_cv2, postprocess -from human_pose_estimation_resnet50_mpii.data_feed import reader -from human_pose_estimation_resnet50_mpii.pose_resnet import ResNet - - -@moduleinfo( - name="human_pose_estimation_resnet50_mpii", - type="CV/keypoint_detection", - author="paddlepaddle", - author_email="paddle-dev@baidu.comi", - summary= - "Paddle implementation for the paper `Simple baselines for human pose estimation and tracking`, trained with the MPII dataset.", - version="1.1.1") -class HumanPoseEstimation(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "pose-resnet50-mpii-384x384") - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def keypoint_detection(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - output_dir='output_pose', - visualization=False): - """ - API for human pose estimation and tracking. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - - Returns: - res (list[dict]): each element of res is a dict, keys contains 'path', 'data', the corresponding valus are: - path (str): the path of original image. - data (OrderedDict): The key points of human pose. - """ - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) - # postprocess one by one - for i in range(len(batch_data)): - out = postprocess( - out_heatmaps=output[i], - org_im=batch_data[i]['org_im'], - org_im_shape=batch_data[i]['org_im_shape'], - org_im_path=batch_data[i]['org_im_path'], - output_dir=output_dir, - visualization=visualization) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.keypoint_detection(images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the human_pose_estimation_resnet50_mpii module.", - prog='hub run human_pose_estimation_resnet50_mpii', - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.keypoint_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='output_pose', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py b/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py deleted file mode 100644 index 4f1843dc7e8539f8eece83a1a53ec016097c34ed..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py +++ /dev/null @@ -1,187 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid - -__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"] - -BN_MOMENTUM = 0.9 - - -class ResNet(): - def __init__(self, layers=50, kps_num=16, test_mode=False): - """ - :param layers: int, the layers number which is used here - :param kps_num: int, the number of keypoints in accord with the dataset - :param test_mode: bool, if True, only return output heatmaps, no loss - - :return: loss, output heatmaps - """ - self.k = kps_num - self.layers = layers - self.test_mode = test_mode - - def net(self, input, target=None, target_weight=None): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1) - - conv = fluid.layers.conv2d_transpose( - input=conv, - num_filters=256, - filter_size=4, - padding=1, - stride=2, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - conv = fluid.layers.batch_norm( - input=conv, act='relu', momentum=BN_MOMENTUM) - conv = fluid.layers.conv2d_transpose( - input=conv, - num_filters=256, - filter_size=4, - padding=1, - stride=2, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - conv = fluid.layers.batch_norm( - input=conv, act='relu', momentum=BN_MOMENTUM) - conv = fluid.layers.conv2d_transpose( - input=conv, - num_filters=256, - filter_size=4, - padding=1, - stride=2, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - conv = fluid.layers.batch_norm( - input=conv, act='relu', momentum=BN_MOMENTUM) - - out = fluid.layers.conv2d( - input=conv, - num_filters=self.k, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Normal(0., 0.001))) - - if self.test_mode: - return out - else: - loss = self.calc_loss(out, target, target_weight) - return loss, out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - return fluid.layers.batch_norm( - input=conv, act=act, momentum=BN_MOMENTUM) - - def shortcut(self, input, ch_out, stride): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride) - else: - return input - - def calc_loss(self, heatmap, target, target_weight): - _, c, h, w = heatmap.shape - x = fluid.layers.reshape(heatmap, (-1, self.k, h * w)) - y = fluid.layers.reshape(target, (-1, self.k, h * w)) - w = fluid.layers.reshape(target_weight, (-1, self.k)) - - x = fluid.layers.split(x, num_or_sections=self.k, dim=1) - y = fluid.layers.split(y, num_or_sections=self.k, dim=1) - w = fluid.layers.split(w, num_or_sections=self.k, dim=1) - - _list = [] - for idx in range(self.k): - _tmp = fluid.layers.scale(x=x[idx] - y[idx], scale=1.) - _tmp = _tmp * _tmp - _tmp = fluid.layers.reduce_mean(_tmp, dim=2) - _list.append(_tmp * w[idx]) - - _loss = fluid.layers.concat(_list, axis=0) - _loss = fluid.layers.reduce_mean(_loss) - return 0.5 * _loss - - def bottleneck_block(self, input, num_filters, stride): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None) - - short = self.shortcut(input, num_filters * 4, stride) - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/processor.py b/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/processor.py deleted file mode 100644 index 310b2a70fc3c08103149869d1ae0e3c55a8816ee..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/processor.py +++ /dev/null @@ -1,147 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import base64 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -__all__ = ['base64_to_cv2', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_max_preds(batch_heatmaps): - """ - Get predictions from score maps. - - Args: - batch_heatmaps (numpy.ndarray): output of the network, with shape [N, C, H, W] - """ - assert isinstance(batch_heatmaps, np.ndarray), \ - 'batch_heatmaps should be numpy.ndarray' - assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' - - batch_size = batch_heatmaps.shape[0] - num_joints = batch_heatmaps.shape[1] - width = batch_heatmaps.shape[3] - heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) - idx = np.argmax(heatmaps_reshaped, 2) - maxvals = np.amax(heatmaps_reshaped, 2) - maxvals = maxvals.reshape((batch_size, num_joints, 1)) - idx = idx.reshape((batch_size, num_joints, 1)) - preds = np.tile(idx, (1, 1, 2)).astype(np.float32) - preds[:, :, 0] = (preds[:, :, 0]) % width - preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) - pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) - pred_mask = pred_mask.astype(np.float32) - preds *= pred_mask - return preds, maxvals - - -def predict_results(batch_heatmaps): - batch_size, num_joints, heatmap_height, heatmap_width = batch_heatmaps.shape - preds, maxvals = get_max_preds(batch_heatmaps) - return preds[0] * 4, num_joints - - -def postprocess(out_heatmaps, org_im, org_im_shape, org_im_path, output_dir, - visualization): - """ - Postprocess output of network. one image at a time. - - Args: - out_heatmaps (numpy.ndarray): output of network. - org_im (numpy.ndarray): original image. - org_im_shape (list): shape pf original image. - org_im_path (list): path of riginal image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - - Returns: - res (dict): Output of postprocess. keys contains 'path', 'data', the corresponding valus are: - path (str): the path of original image. - data (OrderedDict): The key points of human pose. - """ - res = dict() - res['path'] = org_im_path - res['data'] = OrderedDict() - preds, num_joints = predict_results(out_heatmaps) - scale_horizon = org_im_shape[1] * 1.0 / 384 - scale_vertical = org_im_shape[0] * 1.0 / 384 - preds = np.multiply(preds, (scale_horizon, scale_vertical)).astype(int) - if visualization: - icolor = (255, 137, 0) - ocolor = (138, 255, 0) - rendered_im = org_im.copy() - for j in range(num_joints): - x, y = preds[j] - cv2.circle(rendered_im, (x, y), 3, icolor, -1, 16) - cv2.circle(rendered_im, (x, y), 6, ocolor, 1, 16) - check_dir(output_dir) - save_im_name = get_save_image_name(org_im, org_im_path, output_dir) - cv2.imwrite(save_im_name, rendered_im) - print('image saved in {}'.format(save_im_name)) - - # articulation - preds = list(map(lambda pred: [int(_) for _ in pred], preds)) - res['data']['left_ankle'] = list(preds[0]) - res['data']['left_knee'] = list(preds[1]) - res['data']['left_hip'] = list(preds[2]) - res['data']['right_hip'] = list(preds[3]) - res['data']['right_knee'] = list(preds[4]) - res['data']['right_ankle'] = list(preds[5]) - res['data']['pelvis'] = list(preds[6]) - res['data']['thorax'] = list(preds[7]) - res['data']['upper_neck'] = list(preds[8]) - res['data']['head_top'] = list(preds[9]) - res['data']['right_wrist'] = list(preds[10]) - res['data']['right_elbow'] = list(preds[11]) - res['data']['right_shoulder'] = list(preds[12]) - res['data']['left_shoulder'] = list(preds[13]) - res['data']['left_elbow'] = list(preds[14]) - res['data']['left_wrist'] = list(preds[15]) - - return res - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of original image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - # extension - ext = '.jpg' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path diff --git a/hub_module/modules/image/keypoint_detection/openpose_body_estimation/module.py b/hub_module/modules/image/keypoint_detection/openpose_body_estimation/module.py deleted file mode 100644 index 0b1f9f32c52c50efc532ea8a360ed797287a2f6b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/openpose_body_estimation/module.py +++ /dev/null @@ -1,212 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import copy -from collections import OrderedDict - -import cv2 -import paddle -import paddle.nn as nn -import numpy as np -from paddlehub.module.module import moduleinfo -import paddlehub.process.transforms as T - -import openpose_body_estimation.processor as P - - -@moduleinfo(name="openpose_body_estimation", - type="CV/image_editing", - author="paddlepaddle", - author_email="", - summary="Openpose_body_estimation is a body pose estimation model based on Realtime Multi-Person 2D Pose \ - Estimation using Part Affinity Fields.", - version="1.0.0") -class BodyPoseModel(nn.Layer): - """ - BodyPoseModel - - Args: - load_checkpoint(str): Checkpoint save path, default is None. - visualization (bool): Whether to save the estimation result. Default is True. - """ - def __init__(self, load_checkpoint: str = None, visualization: bool = True): - super(BodyPoseModel, self).__init__() - - self.resize_func = T.ResizeScaling() - self.norm_func = T.Normalize(std=[1, 1, 1]) - self.pad_func = P.PadDownRight() - self.remove_pad = P.RemovePadding() - self.get_peak = P.GetPeak() - self.get_connection = P.Connection() - self.get_candidate = P.Candidate() - self.draw_pose = P.DrawPose() - self.visualization = visualization - - no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1', \ - 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2', \ - 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1', \ - 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1'] - blocks = {} - block0 = OrderedDict([('conv1_1', [3, 64, 3, 1, 1]), ('conv1_2', [64, 64, 3, 1, 1]), ('pool1_stage1', [2, 2, - 0]), - ('conv2_1', [64, 128, 3, 1, 1]), ('conv2_2', [128, 128, 3, 1, 1]), - ('pool2_stage1', [2, 2, 0]), ('conv3_1', [128, 256, 3, 1, 1]), - ('conv3_2', [256, 256, 3, 1, 1]), ('conv3_3', [256, 256, 3, 1, 1]), - ('conv3_4', [256, 256, 3, 1, 1]), ('pool3_stage1', [2, 2, 0]), - ('conv4_1', [256, 512, 3, 1, 1]), ('conv4_2', [512, 512, 3, 1, 1]), - ('conv4_3_CPM', [512, 256, 3, 1, 1]), ('conv4_4_CPM', [256, 128, 3, 1, 1])]) - - block1_1 = OrderedDict([('conv5_1_CPM_L1', [128, 128, 3, 1, 1]), ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]), - ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]), ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]), - ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])]) - - block1_2 = OrderedDict([('conv5_1_CPM_L2', [128, 128, 3, 1, 1]), ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]), - ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]), ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]), - ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])]) - blocks['block1_1'] = block1_1 - blocks['block1_2'] = block1_2 - - self.model0 = self.make_layers(block0, no_relu_layers) - - for i in range(2, 7): - blocks['block%d_1' % i] = OrderedDict([('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]), - ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]), - ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]), - ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]), - ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]), - ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]), - ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])]) - - blocks['block%d_2' % i] = OrderedDict([('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]), - ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]), - ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]), - ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]), - ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]), - ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]), - ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])]) - - for k in blocks.keys(): - blocks[k] = self.make_layers(blocks[k], no_relu_layers) - - self.model1_1 = blocks['block1_1'] - self.model2_1 = blocks['block2_1'] - self.model3_1 = blocks['block3_1'] - self.model4_1 = blocks['block4_1'] - self.model5_1 = blocks['block5_1'] - self.model6_1 = blocks['block6_1'] - - self.model1_2 = blocks['block1_2'] - self.model2_2 = blocks['block2_2'] - self.model3_2 = blocks['block3_2'] - self.model4_2 = blocks['block4_2'] - self.model5_2 = blocks['block5_2'] - self.model6_2 = blocks['block6_2'] - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'openpose_body.pdparams') - if not os.path.exists(checkpoint): - os.system('wget https://paddlehub.bj.bcebos.com/dygraph/pose/openpose_body.pdparams -O ' + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def make_layers(self, block: dict, no_relu_layers: list): - layers = [] - for layer_name, v in block.items(): - if 'pool' in layer_name: - layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2]) - layers.append((layer_name, layer)) - else: - conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) - layers.append((layer_name, conv2d)) - if layer_name not in no_relu_layers: - layers.append(('relu_' + layer_name, nn.ReLU())) - layers = tuple(layers) - return nn.Sequential(*layers) - - def transform(self, orgimg: np.ndarray, scale_search: float = 0.5): - process = self.resize_func(orgimg, scale_search) - imageToTest_padded, pad = self.pad_func(process) - process = self.norm_func(imageToTest_padded) - process = np.ascontiguousarray(np.transpose(process[:, :, :, np.newaxis], (3, 2, 0, 1))).astype("float32") - - return process, imageToTest_padded, pad - - def forward(self, x: paddle.Tensor): - - out1 = self.model0(x) - - out1_1 = self.model1_1(out1) - out1_2 = self.model1_2(out1) - out2 = paddle.concat([out1_1, out1_2, out1], 1) - - out2_1 = self.model2_1(out2) - out2_2 = self.model2_2(out2) - out3 = paddle.concat([out2_1, out2_2, out1], 1) - - out3_1 = self.model3_1(out3) - out3_2 = self.model3_2(out3) - out4 = paddle.concat([out3_1, out3_2, out1], 1) - - out4_1 = self.model4_1(out4) - out4_2 = self.model4_2(out4) - out5 = paddle.concat([out4_1, out4_2, out1], 1) - - out5_1 = self.model5_1(out5) - out5_2 = self.model5_2(out5) - out6 = paddle.concat([out5_1, out5_2, out1], 1) - - out6_1 = self.model6_1(out6) - out6_2 = self.model6_2(out6) - - return out6_1, out6_2 - - def predict(self, img_path: str, save_path: str = "result"): - self.eval() - orgImg = cv2.imread(img_path) - data, imageToTest_padded, pad = self.transform(orgImg) - Mconv7_stage6_L1, Mconv7_stage6_L2 = self.forward(paddle.to_tensor(data)) - Mconv7_stage6_L1 = Mconv7_stage6_L1.numpy() - Mconv7_stage6_L2 = Mconv7_stage6_L2.numpy() - - heatmap_avg = self.remove_pad(Mconv7_stage6_L2, imageToTest_padded, orgImg, pad) - paf_avg = self.remove_pad(Mconv7_stage6_L1, imageToTest_padded, orgImg, pad) - - all_peaks = self.get_peak(heatmap_avg) - connection_all, special_k = self.get_connection(all_peaks, paf_avg, orgImg) - candidate, subset = self.get_candidate(all_peaks, connection_all, special_k) - - if self.visualization: - canvas = copy.deepcopy(orgImg) - canvas = self.draw_pose(canvas, candidate, subset) - if not os.path.exists(save_path): - os.mkdir(save_path) - save_path = os.path.join(save_path, img_path.rsplit("/", 1)[-1]) - cv2.imwrite(save_path, canvas) - return candidate, subset - - -if __name__ == "__main__": - - paddle.disable_static() - model = BodyPoseModel() - model.eval() - out1, out2 = model.predict("demo.jpg") diff --git a/hub_module/modules/image/keypoint_detection/openpose_body_estimation/processor.py b/hub_module/modules/image/keypoint_detection/openpose_body_estimation/processor.py deleted file mode 100644 index e5dc17ca2dafa1b007840fa03e3985500f283aa7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/openpose_body_estimation/processor.py +++ /dev/null @@ -1,304 +0,0 @@ -import math - -import cv2 -import numpy as np -from scipy.ndimage.filters import gaussian_filter - - -class PadDownRight: - """ - Get padding images. - - Args: - stride(int): Stride for calculate pad value for edges. - padValue(int): Initialization for new area. - """ - def __init__(self, stride: int = 8, padValue: int = 128): - self.stride = stride - self.padValue = padValue - - def __call__(self, img: np.ndarray): - h, w = img.shape[0:2] - pad = 4 * [0] - pad[2] = 0 if (h % self.stride == 0) else self.stride - (h % self.stride) # down - pad[3] = 0 if (w % self.stride == 0) else self.stride - (w % self.stride) # right - - img_padded = img - pad_up = np.tile(img_padded[0:1, :, :] * 0 + self.padValue, (pad[0], 1, 1)) - img_padded = np.concatenate((pad_up, img_padded), axis=0) - pad_left = np.tile(img_padded[:, 0:1, :] * 0 + self.padValue, (1, pad[1], 1)) - img_padded = np.concatenate((pad_left, img_padded), axis=1) - pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + self.padValue, (pad[2], 1, 1)) - img_padded = np.concatenate((img_padded, pad_down), axis=0) - pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + self.padValue, (1, pad[3], 1)) - img_padded = np.concatenate((img_padded, pad_right), axis=1) - - return img_padded, pad - - -class RemovePadding: - """ - Remove the padding values. - - Args: - stride(int): Scales for resizing the images. - - """ - def __init__(self, stride: int = 8): - self.stride = stride - - def __call__(self, data: np.ndarray, imageToTest_padded: np.ndarray, oriImg: np.ndarray, pad: list) -> np.ndarray: - heatmap = np.transpose(np.squeeze(data), (1, 2, 0)) - heatmap = cv2.resize(heatmap, (0, 0), fx=self.stride, fy=self.stride, interpolation=cv2.INTER_CUBIC) - heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] - heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) - - return heatmap - - -class GetPeak: - """ - Get peak values and coordinate from input. - - Args: - thresh(float): Threshold value for selecting peak value, default is 0.1. - """ - def __init__(self, thresh=0.1): - self.thresh = thresh - - def __call__(self, heatmap: np.ndarray): - all_peaks = [] - peak_counter = 0 - for part in range(18): - map_ori = heatmap[:, :, part] - one_heatmap = gaussian_filter(map_ori, sigma=3) - - map_left = np.zeros(one_heatmap.shape) - map_left[1:, :] = one_heatmap[:-1, :] - map_right = np.zeros(one_heatmap.shape) - map_right[:-1, :] = one_heatmap[1:, :] - map_up = np.zeros(one_heatmap.shape) - map_up[:, 1:] = one_heatmap[:, :-1] - map_down = np.zeros(one_heatmap.shape) - map_down[:, :-1] = one_heatmap[:, 1:] - - peaks_binary = np.logical_and.reduce( - (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, - one_heatmap > self.thresh)) - peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse - peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks] - peak_id = range(peak_counter, peak_counter + len(peaks)) - peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i], ) for i in range(len(peak_id))] - - all_peaks.append(peaks_with_score_and_id) - peak_counter += len(peaks) - - return all_peaks - - -class Connection: - """ - Get connection for selected estimation points. - - Args: - mapIdx(list): Part Affinity Fields map index, default is None. - limbSeq(list): Peak candidate map index, default is None. - - """ - def __init__(self, mapIdx: list = None, limbSeq: list = None): - if mapIdx and limbSeq: - self.mapIdx = mapIdx - self.limbSeq = limbSeq - else: - self.mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ - [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ - [55, 56], [37, 38], [45, 46]] - - self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ - [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ - [1, 16], [16, 18], [3, 17], [6, 18]] - self.caculate_vector = CalculateVector() - - def __call__(self, all_peaks: list, paf_avg: np.ndarray, orgimg: np.ndarray): - connection_all = [] - special_k = [] - for k in range(len(self.mapIdx)): - score_mid = paf_avg[:, :, [x - 19 for x in self.mapIdx[k]]] - candA = all_peaks[self.limbSeq[k][0] - 1] - candB = all_peaks[self.limbSeq[k][1] - 1] - nA = len(candA) - nB = len(candB) - if nA != 0 and nB != 0: - connection_candidate = self.caculate_vector(candA, candB, nA, nB, score_mid, orgimg) - connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) - connection = np.zeros((0, 5)) - for c in range(len(connection_candidate)): - i, j, s = connection_candidate[c][0:3] - if i not in connection[:, 3] and j not in connection[:, 4]: - connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) - if len(connection) >= min(nA, nB): - break - - connection_all.append(connection) - else: - special_k.append(k) - connection_all.append([]) - - return connection_all, special_k - - -class CalculateVector: - """ - Vector decomposition and normalization, refer Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields - for more details. - - Args: - thresh(float): Threshold value for selecting candidate vector, default is 0.05. - """ - def __init__(self, thresh: float = 0.05): - self.thresh = thresh - - def __call__(self, candA: list, candB: list, nA: int, nB: int, score_mid: np.ndarray, oriImg: np.ndarray): - connection_candidate = [] - for i in range(nA): - for j in range(nB): - vec = np.subtract(candB[j][:2], candA[i][:2]) - norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) + 1e-5 - vec = np.divide(vec, norm) - - startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=10), \ - np.linspace(candA[i][1], candB[j][1], num=10))) - - vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ - for I in range(len(startend))]) - vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ - for I in range(len(startend))]) - - score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) - score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(0.5 * oriImg.shape[0] / norm - 1, 0) - criterion1 = len(np.nonzero(score_midpts > self.thresh)[0]) > 0.8 * len(score_midpts) - criterion2 = score_with_dist_prior > 0 - if criterion1 and criterion2: - connection_candidate.append( - [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) - return connection_candidate - - -class DrawPose: - """ - Draw Pose estimation results on canvas. - - Args: - stickwidth(int): Angle value to draw approximate ellipse curve, default is 4. - - """ - def __init__(self, stickwidth: int = 4): - self.stickwidth = stickwidth - - self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], - [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]] - - self.colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], - [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], - [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], - [255, 0, 170], [255, 0, 85]] - - def __call__(self, canvas: np.ndarray, candidate: np.ndarray, subset: np.ndarray): - for i in range(18): - for n in range(len(subset)): - index = int(subset[n][i]) - if index == -1: - continue - x, y = candidate[index][0:2] - cv2.circle(canvas, (int(x), int(y)), 4, self.colors[i], thickness=-1) - for i in range(17): - for n in range(len(subset)): - index = subset[n][np.array(self.limbSeq[i]) - 1] - if -1 in index: - continue - cur_canvas = canvas.copy() - Y = candidate[index.astype(int), 0] - X = candidate[index.astype(int), 1] - mX = np.mean(X) - mY = np.mean(Y) - length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 - angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) - polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), self.stickwidth), \ - int(angle), 0, 360, 1) - cv2.fillConvexPoly(cur_canvas, polygon, self.colors[i]) - canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) - return canvas - - -class Candidate: - """ - Select candidate for body pose estimation. - - Args: - mapIdx(list): Part Affinity Fields map index, default is None. - limbSeq(list): Peak candidate map index, default is None. - """ - def __init__(self, mapIdx: list = None, limbSeq: list = None): - if mapIdx and limbSeq: - self.mapIdx = mapIdx - self.limbSeq = limbSeq - else: - self.mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ - [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ - [55, 56], [37, 38], [45, 46]] - self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ - [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ - [1, 16], [16, 18], [3, 17], [6, 18]] - - def __call__(self, all_peaks: list, connection_all: list, special_k: list): - subset = -1 * np.ones((0, 20)) - candidate = np.array([item for sublist in all_peaks for item in sublist]) - for k in range(len(self.mapIdx)): - if k not in special_k: - partAs = connection_all[k][:, 0] - partBs = connection_all[k][:, 1] - indexA, indexB = np.array(self.limbSeq[k]) - 1 - - for i in range(len(connection_all[k])): # = 1:size(temp,1) - found = 0 - subset_idx = [-1, -1] - for j in range(len(subset)): # 1:size(subset,1): - if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: - subset_idx[found] = j - found += 1 - - if found == 1: - j = subset_idx[0] - if subset[j][indexB] != partBs[i]: - subset[j][indexB] = partBs[i] - subset[j][-1] += 1 - subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] - elif found == 2: # if found 2 and disjoint, merge them - j1, j2 = subset_idx - membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] - if len(np.nonzero(membership == 2)[0]) == 0: # merge - subset[j1][:-2] += (subset[j2][:-2] + 1) - subset[j1][-2:] += subset[j2][-2:] - subset[j1][-2] += connection_all[k][i][2] - subset = np.delete(subset, j2, 0) - else: # as like found == 1 - subset[j1][indexB] = partBs[i] - subset[j1][-1] += 1 - subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] - - # if find no partA in the subset, create a new subset - elif not found and k < 17: - row = -1 * np.ones(20) - row[indexA] = partAs[i] - row[indexB] = partBs[i] - row[-1] = 2 - row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] - subset = np.vstack([subset, row]) - # delete some rows of subset which has few parts occur - deleteIdx = [] - for i in range(len(subset)): - if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: - deleteIdx.append(i) - subset = np.delete(subset, deleteIdx, axis=0) - - return candidate, subset diff --git a/hub_module/modules/image/keypoint_detection/openpose_hands_estimation/module.py b/hub_module/modules/image/keypoint_detection/openpose_hands_estimation/module.py deleted file mode 100644 index cc983d8bff9cc6243343d23f0280fe5cffc331fa..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/openpose_hands_estimation/module.py +++ /dev/null @@ -1,204 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import copy -from collections import OrderedDict - -import cv2 -import paddle -import numpy as np -import paddle.nn as nn -import paddlehub as hub -from skimage.measure import label -from scipy.ndimage.filters import gaussian_filter -from paddlehub.module.module import moduleinfo -from paddlehub.process.functional import npmax -import paddlehub.process.transforms as T - -import openpose_hands_estimation.processor as P - - -@moduleinfo(name="openpose_hands_estimation", - type="CV/image_editing", - author="paddlepaddle", - author_email="", - summary="Openpose_hands_estimation is a hand pose estimation model based on Hand Keypoint Detection in \ - Single Images using Multiview Bootstrapping.", - version="1.0.0") -class HandPoseModel(nn.Layer): - """ - HandPoseModel - - Args: - load_checkpoint(str): Checkpoint save path, default is None. - visualization (bool): Whether to save the estimation result. Default is True. - """ - def __init__(self, load_checkpoint: str = None, visualization: bool = True): - super(HandPoseModel, self).__init__() - - self.visualization = visualization - self.resize_func = T.ResizeScaling() - self.norm_func = T.Normalize(std=[1, 1, 1]) - self.hand_detect = P.HandDetect() - self.pad_func = P.PadDownRight() - self.remove_pad = P.RemovePadding() - self.draw_pose = P.DrawPose() - self.draw_hand = P.DrawHandPose() - no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3', \ - 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'] - - block1_0 = OrderedDict([('conv1_1', [3, 64, 3, 1, 1]), ('conv1_2', [64, 64, 3, 1, 1]), - ('pool1_stage1', [2, 2, 0]), ('conv2_1', [64, 128, 3, 1, 1]), - ('conv2_2', [128, 128, 3, 1, 1]), ('pool2_stage1', [2, 2, 0]), - ('conv3_1', [128, 256, 3, 1, 1]), ('conv3_2', [256, 256, 3, 1, 1]), - ('conv3_3', [256, 256, 3, 1, 1]), ('conv3_4', [256, 256, 3, 1, 1]), - ('pool3_stage1', [2, 2, 0]), ('conv4_1', [256, 512, 3, 1, 1]), - ('conv4_2', [512, 512, 3, 1, 1]), ('conv4_3', [512, 512, 3, 1, 1]), - ('conv4_4', [512, 512, 3, 1, 1]), ('conv5_1', [512, 512, 3, 1, 1]), - ('conv5_2', [512, 512, 3, 1, 1]), ('conv5_3_CPM', [512, 128, 3, 1, 1])]) - - block1_1 = OrderedDict([('conv6_1_CPM', [128, 512, 1, 1, 0]), ('conv6_2_CPM', [512, 22, 1, 1, 0])]) - - blocks = {} - blocks['block1_0'] = block1_0 - blocks['block1_1'] = block1_1 - - for i in range(2, 7): - blocks['block%d' % i] = OrderedDict([('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]), - ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]), - ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]), - ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]), - ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]), - ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]), - ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])]) - - for k in blocks.keys(): - blocks[k] = self.make_layers(blocks[k], no_relu_layers) - - self.model1_0 = blocks['block1_0'] - self.model1_1 = blocks['block1_1'] - self.model2 = blocks['block2'] - self.model3 = blocks['block3'] - self.model4 = blocks['block4'] - self.model5 = blocks['block5'] - self.model6 = blocks['block6'] - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'openpose_hand.pdparams') - if not os.path.exists(checkpoint): - os.system('wget https://paddlehub.bj.bcebos.com/dygraph/pose/openpose_hand.pdparams -O ' + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def make_layers(self, block: dict, no_relu_layers: list): - layers = [] - for layer_name, v in block.items(): - if 'pool' in layer_name: - layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2]) - layers.append((layer_name, layer)) - else: - conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) - layers.append((layer_name, conv2d)) - if layer_name not in no_relu_layers: - layers.append(('relu_' + layer_name, nn.ReLU())) - layers = tuple(layers) - return nn.Sequential(*layers) - - def forward(self, x: paddle.Tensor): - out1_0 = self.model1_0(x) - out1_1 = self.model1_1(out1_0) - concat_stage2 = paddle.concat([out1_1, out1_0], 1) - out_stage2 = self.model2(concat_stage2) - concat_stage3 = paddle.concat([out_stage2, out1_0], 1) - out_stage3 = self.model3(concat_stage3) - concat_stage4 = paddle.concat([out_stage3, out1_0], 1) - out_stage4 = self.model4(concat_stage4) - concat_stage5 = paddle.concat([out_stage4, out1_0], 1) - out_stage5 = self.model5(concat_stage5) - concat_stage6 = paddle.concat([out_stage5, out1_0], 1) - out_stage6 = self.model6(concat_stage6) - return out_stage6 - - def hand_estimation(self, handimg: np.ndarray, scale_search: list): - heatmap_avg = np.zeros((handimg.shape[0], handimg.shape[1], 22)) - - for scale in scale_search: - process = self.resize_func(handimg, scale) - imageToTest_padded, pad = self.pad_func(process) - process = self.norm_func(imageToTest_padded) - process = np.ascontiguousarray(np.transpose(process[:, :, :, np.newaxis], (3, 2, 0, 1))).astype("float32") - data = self.forward(paddle.to_tensor(process)) - data = data.numpy() - heatmap = self.remove_pad(data, imageToTest_padded, handimg, pad) - heatmap_avg += heatmap / len(scale_search) - - all_peaks = [] - for part in range(21): - map_ori = heatmap_avg[:, :, part] - one_heatmap = gaussian_filter(map_ori, sigma=3) - binary = np.ascontiguousarray(one_heatmap > 0.05, dtype=np.uint8) - if np.sum(binary) == 0: - all_peaks.append([0, 0]) - continue - label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) - max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 - label_img[label_img != max_index] = 0 - map_ori[label_img == 0] = 0 - - y, x = npmax(map_ori) - all_peaks.append([x, y]) - - return np.array(all_peaks) - - def predict(self, img_path: str, save_path: str = 'result', scale: list = [0.5, 1.0, 1.5, 2.0]): - self.eval() - self.body_model = hub.Module(name='openpose_body_estimation') - self.body_model.eval() - org_img = cv2.imread(img_path) - - candidate, subset = self.body_model.predict(img_path) - hands_list = self.hand_detect(candidate, subset, org_img) - - all_hand_peaks = [] - - for x, y, w, is_left in hands_list: - peaks = self.hand_estimation(org_img[y:y + w, x:x + w, :], scale) - peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) - peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) - all_hand_peaks.append(peaks) - - if self.visualization: - canvas = copy.deepcopy(org_img) - canvas = self.draw_pose(canvas, candidate, subset) - canvas = self.draw_hand(canvas, all_hand_peaks) - if not os.path.exists(save_path): - os.mkdir(save_path) - save_path = os.path.join(save_path, img_path.rsplit("/", 1)[-1]) - cv2.imwrite(save_path, canvas) - return all_hand_peaks - - -if __name__ == "__main__": - paddle.disable_static() - model = HandPoseModel() - model.eval() - out1 = model.predict("detect_hand4.jpg") diff --git a/hub_module/modules/image/keypoint_detection/openpose_hands_estimation/processor.py b/hub_module/modules/image/keypoint_detection/openpose_hands_estimation/processor.py deleted file mode 100644 index 920d1b07f27ef1f188e6750db79fb720ce604251..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/keypoint_detection/openpose_hands_estimation/processor.py +++ /dev/null @@ -1,207 +0,0 @@ -import math - -import cv2 -import numpy as np -import matplotlib -from matplotlib import pyplot as plt -from matplotlib.figure import Figure -from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas -matplotlib.use('Agg') - - -class HandDetect: - """ - Detect hand pose information from body pose estimation result. - - Args: - ratioWristElbow(float): Ratio to adjust the wrist center, ,default is 0.33. - """ - def __init__(self, ratioWristElbow: float = 0.33): - self.ratioWristElbow = ratioWristElbow - - def __call__(self, candidate: np.ndarray, subset: np.ndarray, oriImg: np.ndarray): - detect_result = [] - image_height, image_width = oriImg.shape[0:2] - for person in subset.astype(int): - has_left = np.sum(person[[5, 6, 7]] == -1) == 0 - has_right = np.sum(person[[2, 3, 4]] == -1) == 0 - if not (has_left or has_right): - continue - hands = [] - # left hand - if has_left: - left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]] - x1, y1 = candidate[left_shoulder_index][:2] - x2, y2 = candidate[left_elbow_index][:2] - x3, y3 = candidate[left_wrist_index][:2] - hands.append([x1, y1, x2, y2, x3, y3, True]) - # right hand - if has_right: - right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]] - x1, y1 = candidate[right_shoulder_index][:2] - x2, y2 = candidate[right_elbow_index][:2] - x3, y3 = candidate[right_wrist_index][:2] - hands.append([x1, y1, x2, y2, x3, y3, False]) - - for x1, y1, x2, y2, x3, y3, is_left in hands: - - x = x3 + self.ratioWristElbow * (x3 - x2) - y = y3 + self.ratioWristElbow * (y3 - y2) - distanceWristElbow = math.sqrt((x3 - x2)**2 + (y3 - y2)**2) - distanceElbowShoulder = math.sqrt((x2 - x1)**2 + (y2 - y1)**2) - width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder) - - x -= width / 2 - y -= width / 2 - - if x < 0: x = 0 - if y < 0: y = 0 - width1 = width - width2 = width - if x + width > image_width: width1 = image_width - x - if y + width > image_height: width2 = image_height - y - width = min(width1, width2) - - if width >= 20: - detect_result.append([int(x), int(y), int(width), is_left]) - - return detect_result - - -class PadDownRight: - """ - Get padding images. - - Args: - stride(int): Stride for calculate pad value for edges. - padValue(int): Initialization for new area. - """ - def __init__(self, stride: int = 8, padValue: int = 128): - self.stride = stride - self.padValue = padValue - - def __call__(self, img: np.ndarray): - h, w = img.shape[0:2] - pad = 4 * [0] - pad[2] = 0 if (h % self.stride == 0) else self.stride - (h % self.stride) # down - pad[3] = 0 if (w % self.stride == 0) else self.stride - (w % self.stride) # right - - img_padded = img - pad_up = np.tile(img_padded[0:1, :, :] * 0 + self.padValue, (pad[0], 1, 1)) - img_padded = np.concatenate((pad_up, img_padded), axis=0) - pad_left = np.tile(img_padded[:, 0:1, :] * 0 + self.padValue, (1, pad[1], 1)) - img_padded = np.concatenate((pad_left, img_padded), axis=1) - pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + self.padValue, (pad[2], 1, 1)) - img_padded = np.concatenate((img_padded, pad_down), axis=0) - pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + self.padValue, (1, pad[3], 1)) - img_padded = np.concatenate((img_padded, pad_right), axis=1) - - return img_padded, pad - - -class RemovePadding: - """ - Remove the padding values. - - Args: - stride(int): Scales for resizing the images. - - """ - def __init__(self, stride: int = 8): - self.stride = stride - - def __call__(self, data: np.ndarray, imageToTest_padded: np.ndarray, oriImg: np.ndarray, pad: list) -> np.ndarray: - heatmap = np.transpose(np.squeeze(data), (1, 2, 0)) - heatmap = cv2.resize(heatmap, (0, 0), fx=self.stride, fy=self.stride, interpolation=cv2.INTER_CUBIC) - heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] - heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) - - return heatmap - - -class DrawPose: - """ - Draw Pose estimation results on canvas. - - Args: - stickwidth(int): Angle value to draw approximate ellipse curve, default is 4. - - """ - def __init__(self, stickwidth: int = 4): - self.stickwidth = stickwidth - - self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], - [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]] - - self.colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], - [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], - [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], - [255, 0, 170], [255, 0, 85]] - - def __call__(self, canvas: np.ndarray, candidate: np.ndarray, subset: np.ndarray): - for i in range(18): - for n in range(len(subset)): - index = int(subset[n][i]) - if index == -1: - continue - x, y = candidate[index][0:2] - cv2.circle(canvas, (int(x), int(y)), 4, self.colors[i], thickness=-1) - for i in range(17): - for n in range(len(subset)): - index = subset[n][np.array(self.limbSeq[i]) - 1] - if -1 in index: - continue - cur_canvas = canvas.copy() - Y = candidate[index.astype(int), 0] - X = candidate[index.astype(int), 1] - mX = np.mean(X) - mY = np.mean(Y) - length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 - angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) - polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), self.stickwidth), int(angle), 0, 360, - 1) - cv2.fillConvexPoly(cur_canvas, polygon, self.colors[i]) - canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) - return canvas - - -class DrawHandPose: - """ - Draw hand pose estimation results on canvas. - Args: - show_number(bool): Whether to show estimation ids in canvas, default is False. - - """ - def __init__(self, show_number: bool = False): - self.edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \ - [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] - self.show_number = show_number - - def __call__(self, canvas: np.ndarray, all_hand_peaks: list): - fig = Figure(figsize=plt.figaspect(canvas)) - - fig.subplots_adjust(0, 0, 1, 1) - fig.subplots_adjust(bottom=0, top=1, left=0, right=1) - bg = FigureCanvas(fig) - ax = fig.subplots() - ax.axis('off') - ax.imshow(canvas) - - width, height = ax.figure.get_size_inches() * ax.figure.get_dpi() - - for peaks in all_hand_peaks: - for ie, e in enumerate(self.edges): - if np.sum(np.all(peaks[e], axis=1) == 0) == 0: - x1, y1 = peaks[e[0]] - x2, y2 = peaks[e[1]] - ax.plot([x1, x2], [y1, y2], - color=matplotlib.colors.hsv_to_rgb([ie / float(len(self.edges)), 1.0, 1.0])) - - for i, keyponit in enumerate(peaks): - x, y = keyponit - ax.plot(x, y, 'r.') - if self.show_number: - ax.text(x, y, str(i)) - bg.draw() - canvas = np.frombuffer(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3) - return canvas diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py deleted file mode 100644 index 7f72bb939a2df66db7a7ef0c3776a03df6c1992d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py +++ /dev/null @@ -1,269 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -class SmoothL1Loss(object): - ''' - Smooth L1 loss - Args: - sigma (float): hyper param in smooth l1 loss - ''' - - def __init__(self, sigma=1.0): - super(SmoothL1Loss, self).__init__() - self.sigma = sigma - - def __call__(self, x, y, inside_weight=None, outside_weight=None): - return fluid.layers.smooth_l1( - x, - y, - inside_weight=inside_weight, - outside_weight=outside_weight, - sigma=self.sigma) - - -class BoxCoder(object): - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - fc6 = fluid.layers.fc( - input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc( - input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) - - return head_feat - - -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder(), - nms=MultiClassNMS(), - bbox_loss=SmoothL1Loss(), - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - self.bbox_loss = bbox_loss - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = self.bbox_loss( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - # self.box_coder - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - target_box=bbox_pred, - prior_box_var=self.box_coder.prior_box_var, - code_type=self.box_coder.code_type, - box_normalized=self.box_coder.box_normalized, - axis=self.box_coder.axis) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - # self.nms - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - normalized=self.nms.normalized, - nms_eta=self.nms.nms_eta, - background_label=self.nms.background_label) - return pred_result diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py deleted file mode 100644 index e52cce168b1f5ce95404ca20fa12d7d0c0539f63..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py +++ /dev/null @@ -1,117 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -__all__ = ['test_reader'] - - -def test_reader(paths=None, images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (dict): key contains 'image' and 'im_info', the corresponding values is: - image (numpy.ndarray): the image to be fed into network - im_info (numpy.ndarray): the info about the preprocessed. - """ - img_list = list() - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - im = im.astype(np.float32, copy=False) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - target_size = 800 - max_size = 1333 - - shape = im.shape - # im_shape holds the original shape of image. - im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') - im_size_min = np.min(shape[0:2]) - im_size_max = np.max(shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - - resize_w = np.round(im_scale * float(shape[1])) - resize_h = np.round(im_scale * float(shape[0])) - # im_info holds the resize info of image. - im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') - - im = cv2.resize( - im, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=cv2.INTER_LINEAR) - - # HWC --> CHW - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - yield {'image': im, 'im_info': im_info, 'im_shape': im_shape} - - -def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): - max_shape_org = np.array( - [data['image'].shape for data in batch_data]).max(axis=0) - if coarsest_stride > 0: - max_shape = np.zeros((3)).astype('int32') - max_shape[1] = int( - np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) - max_shape[2] = int( - np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) - else: - max_shape = max_shape_org.astype('int32') - - padding_image = list() - padding_info = list() - padding_shape = list() - - for data in batch_data: - im_c, im_h, im_w = data['image'].shape - # image - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) - padding_im[:, 0:im_h, 0:im_w] = data['image'] - padding_image.append(padding_im) - # im_info - data['im_info'][ - 0] = max_shape[1] if use_padded_im_info else max_shape_org[1] - data['im_info'][ - 1] = max_shape[2] if use_padded_im_info else max_shape_org[2] - padding_info.append(data['im_info']) - padding_shape.append(data['im_shape']) - - padding_image = np.array(padding_image).astype('float32') - padding_info = np.array(padding_info).astype('float32') - padding_shape = np.array(padding_shape).astype('float32') - return padding_image, padding_info, padding_shape diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py deleted file mode 100644 index c61cc84ab900a7c700993ab7ce5fc5fa8320ac44..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py +++ /dev/null @@ -1,448 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import ast -import argparse -from collections import OrderedDict -from functools import partial -from math import ceil - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch -from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5 -from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead -from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss -from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner -from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign - - -@moduleinfo( - name="faster_rcnn_resnet50_coco2017", - version="1.1.0", - type="cv/object_detection", - summary= - "Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class FasterRCNNResNet50(hub.Module): - def _initialize(self): - # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] - self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of categories - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs (dict): the input variables. - outputs (dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') - # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=4, - freeze_at=2) - body_feats = backbone(image) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) - body_feat_names = list(body_feats.keys()) - # rpn_head: RPNHead - rpn_head = self.rpn_head() - rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) - # train - if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) - rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) - # bbox_assigner: BBoxAssigner - bbox_assigner = self.bbox_assigner(num_classes) - outs = fluid.layers.generate_proposal_labels( - rpn_rois=rois, - gt_classes=gt_class, - is_crowd=is_crowd, - gt_boxes=gt_bbox, - im_info=im_info, - batch_size_per_im=bbox_assigner.batch_size_per_im, - fg_fraction=bbox_assigner.fg_fraction, - fg_thresh=bbox_assigner.fg_thresh, - bg_thresh_hi=bbox_assigner.bg_thresh_hi, - bg_thresh_lo=bbox_assigner.bg_thresh_lo, - bbox_reg_weights=bbox_assigner.bbox_reg_weights, - class_nums=bbox_assigner.class_nums, - use_random=bbox_assigner.use_random) - rois = outs[0] - - body_feat = body_feats[body_feat_names[-1]] - # roi_extractor: RoIAlign - roi_extractor = self.roi_extractor() - roi_feat = fluid.layers.roi_align( - input=body_feat, - rois=rois, - pooled_height=roi_extractor.pooled_height, - pooled_width=roi_extractor.pooled_width, - spatial_scale=roi_extractor.spatial_scale, - sampling_ratio=roi_extractor.sampling_ratio) - # head_feat - bbox_head = self.bbox_head(num_classes) - head_feat = bbox_head.head(roi_feat) - if isinstance(head_feat, OrderedDict): - head_feat = list(head_feat.values())[0] - if phase == 'train': - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name, - 'gt_class': var_prefix + gt_class.name, - 'gt_bbox': var_prefix + gt_bbox.name, - 'is_crowd': var_prefix + is_crowd.name - } - outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] - } - elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name - } - outputs = { - 'head_features': var_prefix + head_feat.name, - 'rois': var_prefix + rois.name, - 'bbox_out': var_prefix + pred.name - } - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - if pretrained: - - def _if_exist(var): - if num_classes != 81: - if 'bbox_pred' in var.name or 'cls_score' in var.name: - return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - return inputs, outputs, context_prog - - def rpn_head(self): - return RPNHead( - anchor_generator=AnchorGenerator( - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]), - rpn_target_assign=RPNTargetAssign( - rpn_batch_size_per_im=256, - rpn_fg_fraction=0.5, - rpn_negative_overlap=0.3, - rpn_positive_overlap=0.7, - rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=12000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=6000, - pre_nms_top_n=1000)) - - def roi_extractor(self): - return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625) - - def bbox_head(self, num_classes): - return BBoxHead( - head=ResNetC5(depth=50, norm_type='affine_channel'), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), - bbox_loss=SmoothL1Loss(), - num_classes=num_classes) - - def bbox_assigner(self, num_classes): - return BBoxAssigner( - batch_size_per_im=512, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - fg_fraction=0.25, - fg_thresh=0.5, - class_nums=num_classes) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def object_detection(self, - paths=None, - images=None, - data=None, - use_gpu=False, - batch_size=1, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] - - all_images = list() - for yield_return in test_reader(paths, images): - all_images.append(yield_return) - - images_num = len(all_images) - loop_num = ceil(images_num / batch_size) - res = [] - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - - padding_image, padding_info, padding_shape = padding_minibatch( - batch_data) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - padding_shape_tensor = PaddleTensor(padding_shape.copy()) - feed_list = [ - padding_image_tensor, padding_info_tensor, padding_shape_tensor - ] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=handle_id, - visualization=visualization) - res += output - return res - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = [] - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.object_detection( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py deleted file mode 100644 index a995854e9de8581da3c4a392f22687c358098d6c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = [ - 'base64_to_cv2', - 'load_label_info', - 'postprocess', -] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py deleted file mode 100644 index 7acdf083ff80bbe0c430f66fe0b6542563bfcb27..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py +++ /dev/null @@ -1,302 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead'] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RPNTargetAssign(object): - # __op__ = fluid.layers.rpn_target_assign - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -class GenerateProposals(object): - # __op__ = fluid.layers.generate_proposals - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors self.anchor_generator - self.anchor, self.anchor_var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=self.anchor_generator.anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance, - stride=self.anchor_generator.stride) - - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal(loc=0., - scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - # prop_op - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - # self.rpn_target_assign - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, - use_random=self.rpn_target_assign.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py deleted file mode 100644 index 8080ed22f5057ece32f890c6bb2e44564700f9e1..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py +++ /dev/null @@ -1,270 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -class SmoothL1Loss(object): - ''' - Smooth L1 loss - Args: - sigma (float): hyper param in smooth l1 loss - ''' - - def __init__(self, sigma=1.0): - super(SmoothL1Loss, self).__init__() - self.sigma = sigma - - def __call__(self, x, y, inside_weight=None, outside_weight=None): - return fluid.layers.smooth_l1( - x, - y, - inside_weight=inside_weight, - outside_weight=outside_weight, - sigma=self.sigma) - - -class BoxCoder(object): - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - fc6 = fluid.layers.fc( - input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc( - input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) - - return head_feat - - -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder(), - nms=MultiClassNMS(), - bbox_loss=SmoothL1Loss(), - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - self.bbox_loss = bbox_loss - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = self.bbox_loss( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - # self.box_coder - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - target_box=bbox_pred, - prior_box_var=self.box_coder.prior_box_var, - code_type=self.box_coder.code_type, - box_normalized=self.box_coder.box_normalized, - axis=self.box_coder.axis) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - # self.nms - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - normalized=self.nms.normalized, - nms_eta=self.nms.nms_eta, - background_label=self.nms.background_label) - return pred_result diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py deleted file mode 100644 index b38501e5bf18c479f4de0de565cba26c84fa0c9d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py +++ /dev/null @@ -1,118 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -__all__ = ['test_reader'] - - -def test_reader(paths=None, images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is: - image (numpy.ndarray): the image to be fed into network - im_info (numpy.ndarray): the info about the preprocessed. - im_shape (numpy.ndarray): the shape of image. - """ - img_list = list() - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - im = im.astype(np.float32, copy=False) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - target_size = 800 - max_size = 1333 - - shape = im.shape - # im_shape holds the original shape of image. - im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') - im_size_min = np.min(shape[0:2]) - im_size_max = np.max(shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - - resize_w = np.round(im_scale * float(shape[1])) - resize_h = np.round(im_scale * float(shape[0])) - # im_info holds the resize info of image. - im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') - - im = cv2.resize( - im, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=cv2.INTER_LINEAR) - - # HWC --> CHW - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - yield {'image': im, 'im_info': im_info, 'im_shape': im_shape} - - -def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): - max_shape_org = np.array( - [data['image'].shape for data in batch_data]).max(axis=0) - if coarsest_stride > 0: - max_shape = np.zeros((3)).astype('int32') - max_shape[1] = int( - np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) - max_shape[2] = int( - np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) - else: - max_shape = max_shape_org.astype('int32') - - padding_image = list() - padding_info = list() - padding_shape = list() - - for data in batch_data: - im_c, im_h, im_w = data['image'].shape - # image - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) - padding_im[:, 0:im_h, 0:im_w] = data['image'] - padding_image.append(padding_im) - # im_info - data['im_info'][ - 0] = max_shape[1] if use_padded_im_info else max_shape_org[1] - data['im_info'][ - 1] = max_shape[2] if use_padded_im_info else max_shape_org[2] - padding_info.append(data['im_info']) - padding_shape.append(data['im_shape']) - - padding_image = np.array(padding_image).astype('float32') - padding_info = np.array(padding_info).astype('float32') - padding_shape = np.array(padding_shape).astype('float32') - return padding_image, padding_info, padding_shape diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py deleted file mode 100644 index bd19c712ecc9b0112685c061046812f6ff418e42..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -__all__ = ['ConvNorm', 'FPN'] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr( - name=name + "_weights", - initializer=initializer, - learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr( - name=norm_name + '_scale', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=norm_name + '_offset', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - groups=norm_groups, - param_attr=pattr, - bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=lateral_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=fpn_inner_name + "_w", - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_inner_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len( - spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], - 1, - 'max', - pool_stride=2, - name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py deleted file mode 100644 index f84521ac284742934681fa8ad5e96a11b0990831..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py +++ /dev/null @@ -1,464 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import ast -import argparse -from collections import OrderedDict -from functools import partial -from math import ceil - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch -from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN -from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet -from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead -from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead -from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner -from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign - - -@moduleinfo( - name="faster_rcnn_resnet50_fpn_coco2017", - version="1.0.0", - type="cv/object_detection", - summary= - "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class FasterRCNNResNet50RPN(hub.Module): - def _initialize(self): - # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] - self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_fpn_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of categories - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs (dict): the input variables. - outputs (dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') - # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=[2, 3, 4, 5], - freeze_at=2) - body_feats = backbone(image) - # fpn - fpn = FPN( - max_level=6, - min_level=2, - num_chan=256, - spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) - var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) - body_feat_names = list(body_feats.keys()) - body_feats, spatial_scale = fpn.get_output(body_feats) - # rpn_head: RPNHead - rpn_head = self.rpn_head() - rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) - # train - if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) - rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) - # bbox_assigner: BBoxAssigner - bbox_assigner = self.bbox_assigner(num_classes) - outs = fluid.layers.generate_proposal_labels( - rpn_rois=rois, - gt_classes=gt_class, - is_crowd=is_crowd, - gt_boxes=gt_bbox, - im_info=im_info, - batch_size_per_im=bbox_assigner.batch_size_per_im, - fg_fraction=bbox_assigner.fg_fraction, - fg_thresh=bbox_assigner.fg_thresh, - bg_thresh_hi=bbox_assigner.bg_thresh_hi, - bg_thresh_lo=bbox_assigner.bg_thresh_lo, - bbox_reg_weights=bbox_assigner.bbox_reg_weights, - class_nums=bbox_assigner.class_nums, - use_random=bbox_assigner.use_random) - rois = outs[0] - - roi_extractor = self.roi_extractor() - roi_feat = roi_extractor( - head_inputs=body_feats, - rois=rois, - spatial_scale=spatial_scale) - # head_feat - bbox_head = self.bbox_head(num_classes) - head_feat = bbox_head.head(roi_feat) - if isinstance(head_feat, OrderedDict): - head_feat = list(head_feat.values())[0] - if phase == 'train': - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name, - 'gt_class': var_prefix + gt_class.name, - 'gt_bbox': var_prefix + gt_bbox.name, - 'is_crowd': var_prefix + is_crowd.name - } - outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] - } - elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name - } - outputs = { - 'head_features': var_prefix + head_feat.name, - 'rois': var_prefix + rois.name, - 'bbox_out': var_prefix + pred.name - } - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - if pretrained: - - def _if_exist(var): - if num_classes != 81: - if 'bbox_pred' in var.name or 'cls_score' in var.name: - return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - return inputs, outputs, context_prog - - def rpn_head(self): - return FPNRPNHead( - anchor_generator=AnchorGenerator( - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]), - rpn_target_assign=RPNTargetAssign( - rpn_batch_size_per_im=256, - rpn_fg_fraction=0.5, - rpn_negative_overlap=0.3, - rpn_positive_overlap=0.7, - rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=2000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=1000, - pre_nms_top_n=1000), - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6) - - def roi_extractor(self): - return FPNRoIAlign( - canconical_level=4, - canonical_size=224, - max_level=5, - min_level=2, - box_resolution=7, - sampling_ratio=2) - - def bbox_head(self, num_classes): - return BBoxHead( - head=TwoFCHead(mlp_dim=1024), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), - num_classes=num_classes) - - def bbox_assigner(self, num_classes): - return BBoxAssigner( - batch_size_per_im=512, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - fg_fraction=0.25, - fg_thresh=0.5, - class_nums=num_classes) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def object_detection(self, - paths=None, - images=None, - data=None, - use_gpu=False, - batch_size=1, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] - - all_images = list() - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = ceil(images_num / batch_size) - res = [] - - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - - padding_image, padding_info, padding_shape = padding_minibatch( - batch_data, coarsest_stride=32, use_padded_im_info=True) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - padding_shape_tensor = PaddleTensor(padding_shape.copy()) - feed_list = [ - padding_image_tensor, padding_info_tensor, padding_shape_tensor - ] - - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=handle_id, - visualization=visualization) - res += output - - return res - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = [] - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.object_detection( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py deleted file mode 100644 index a995854e9de8581da3c4a392f22687c358098d6c..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = [ - 'base64_to_cv2', - 'load_label_info', - 'postprocess', -] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py deleted file mode 100644 index 6e3398d8c4ceb4e78802f86de515c26d0a41e34b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding=utf-8 -import paddle.fluid as fluid - -__all__ = ['FPNRoIAlign'] - - -class FPNRoIAlign(object): - """ - RoI align pooling for FPN feature maps - Args: - sampling_ratio (int): number of sampling points - min_level (int): lowest level of FPN layer - max_level (int): highest level of FPN layer - canconical_level (int): the canconical FPN feature map level - canonical_size (int): the canconical FPN feature map size - box_resolution (int): box resolution - mask_resolution (int): mask roi resolution - """ - - def __init__(self, - sampling_ratio=0, - min_level=2, - max_level=5, - canconical_level=4, - canonical_size=224, - box_resolution=7, - mask_resolution=14): - super(FPNRoIAlign, self).__init__() - self.sampling_ratio = sampling_ratio - self.min_level = min_level - self.max_level = max_level - self.canconical_level = canconical_level - self.canonical_size = canonical_size - self.box_resolution = box_resolution - self.mask_resolution = mask_resolution - - def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): - """ - Adopt RoI align onto several level of feature maps to get RoI features. - Distribute RoIs to different levels by area and get a list of RoI - features by distributed RoIs and their corresponding feature maps. - - Returns: - roi_feat(Variable): RoI features with shape of [M, C, R, R], - where M is the number of RoIs and R is RoI resolution - - """ - k_min = self.min_level - k_max = self.max_level - num_roi_lvls = k_max - k_min + 1 - name_list = list(head_inputs.keys()) - input_name_list = name_list[-num_roi_lvls:] - spatial_scale = spatial_scale[-num_roi_lvls:] - rois_dist, restore_index = fluid.layers.distribute_fpn_proposals( - rois, k_min, k_max, self.canconical_level, self.canonical_size) - # rois_dist is in ascend order - roi_out_list = [] - resolution = is_mask and self.mask_resolution or self.box_resolution - for lvl in range(num_roi_lvls): - name_index = num_roi_lvls - lvl - 1 - rois_input = rois_dist[lvl] - head_input = head_inputs[input_name_list[name_index]] - sc = spatial_scale[name_index] - roi_out = fluid.layers.roi_align( - input=head_input, - rois=rois_input, - pooled_height=resolution, - pooled_width=resolution, - spatial_scale=sc, - sampling_ratio=self.sampling_ratio) - roi_out_list.append(roi_out) - roi_feat_shuffle = fluid.layers.concat(roi_out_list) - roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) - roi_feat = fluid.layers.lod_reset(roi_feat_, rois) - - return roi_feat diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py deleted file mode 100644 index e1b69866d3938764669e63aebda321eb1e4c5439..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py +++ /dev/null @@ -1,533 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -__all__ = [ - 'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead', - 'FPNRPNHead' -] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RPNTargetAssign(object): - # __op__ = fluid.layers.rpn_target_assign - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -class GenerateProposals(object): - # __op__ = fluid.layers.generate_proposals - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors self.anchor_generator - self.anchor, self.anchor_var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=self.anchor_generator.anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance, - stride=self.anchor_generator.stride) - - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal(loc=0., - scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - # prop_op - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - # self.rpn_target_assign - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, - use_random=self.rpn_target_assign.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} - - -class FPNRPNHead(RPNHead): - """ - RPN Head that supports FPN input - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - anchor_start_size (int): size of anchor at the first scale - num_chan (int): number of FPN output channels - min_level (int): lowest level of FPN output - max_level (int): highest level of FPN output - num_classes (int): number of classes in rpn output - """ - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6, - num_classes=1): - super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, - train_proposal, test_proposal) - self.anchor_start_size = anchor_start_size - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.num_classes = num_classes - - self.fpn_rpn_list = [] - self.anchors_list = [] - self.anchor_var_list = [] - - def _get_output(self, input, feat_lvl): - """ - Get anchor and FPN RPN head output at one level. - - Args: - input(Variable): Body feature from backbone. - feat_lvl(int): Indicate the level of rpn output corresponding - to the level of feature map. - - Return: - rpn_cls_score(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors * 4, H, W]. - """ - slvl = str(feat_lvl) - conv_name = 'conv_rpn_fpn' + slvl - cls_name = 'rpn_cls_logits_fpn' + slvl - bbox_name = 'rpn_bbox_pred_fpn' + slvl - conv_share_name = 'conv_rpn_fpn' + str(self.min_level) - cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) - bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) - - num_anchors = len(self.anchor_generator.aspect_ratios) - conv_rpn_fpn = fluid.layers.conv2d( - input=input, - num_filters=self.num_chan, - filter_size=3, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # self.anchor_generator - self.anchors, self.anchor_var = fluid.layers.anchor_generator( - input=conv_rpn_fpn, - anchor_sizes=(self.anchor_start_size * 2.** - (feat_lvl - self.min_level), ), - stride=(2.**feat_lvl, 2.**feat_lvl), - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance) - - cls_num_filters = num_anchors * self.num_classes - self.rpn_cls_score = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=cls_num_filters, - filter_size=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - self.rpn_bbox_pred = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=num_anchors * 4, - filter_size=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): - """ - Get proposals in one level according to the output of fpn rpn head - - Args: - body_feat(Variable): the feature map from backone. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - feat_lvl(int): Indicate the level of proposals corresponding to - the feature maps. - - Returns: - rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). - rpn_roi_probs_fpn(Variable): Scores of proposals with - shape of (rois_num, 1). - """ - - rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output( - body_feat, feat_lvl) - - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - if self.num_classes == 1: - rpn_cls_prob_fpn = fluid.layers.sigmoid( - rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) - else: - rpn_cls_score_fpn = fluid.layers.transpose( - rpn_cls_score_fpn, perm=[0, 2, 3, 1]) - rpn_cls_score_fpn = fluid.layers.reshape( - rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_fpn = fluid.layers.softmax( - rpn_cls_score_fpn, - use_cudnn=False, - name='rpn_cls_prob_fpn' + str(feat_lvl)) - rpn_cls_prob_fpn = fluid.layers.slice( - rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) - rpn_cls_prob_fpn = fluid.layers.reshape( - rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) - rpn_cls_prob_fpn = fluid.layers.transpose( - rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) - # prop_op - rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals( - scores=rpn_cls_prob_fpn, - bbox_deltas=rpn_bbox_pred_fpn, - im_info=im_info, - anchors=self.anchors, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois_fpn, rpn_roi_prob_fpn - - def get_proposals(self, fpn_feats, im_info, mode='train'): - """ - Get proposals in multiple levels according to the output of fpn - rpn head - - Args: - fpn_feats(dict): A dictionary represents the output feature map - of FPN with their name. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - - Return: - rois_list(Variable): Output proposals in shape of [rois_num, 4] - """ - rois_list = [] - roi_probs_list = [] - fpn_feat_names = list(fpn_feats.keys()) - for lvl in range(self.min_level, self.max_level + 1): - fpn_feat_name = fpn_feat_names[self.max_level - lvl] - fpn_feat = fpn_feats[fpn_feat_name] - rois_fpn, roi_probs_fpn = self._get_single_proposals( - fpn_feat, im_info, lvl, mode) - self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) - rois_list.append(rois_fpn) - roi_probs_list.append(roi_probs_fpn) - self.anchors_list.append(self.anchors) - self.anchor_var_list.append(self.anchor_var) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - post_nms_top_n = prop_op.post_nms_top_n - rois_collect = fluid.layers.collect_fpn_proposals( - rois_list, - roi_probs_list, - self.min_level, - self.max_level, - post_nms_top_n, - name='collect') - return rois_collect - - def _get_loss_input(self): - rpn_clses = [] - rpn_bboxes = [] - anchors = [] - anchor_vars = [] - for i in range(len(self.fpn_rpn_list)): - single_input = self._transform_input( - self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], - self.anchors_list[i], self.anchor_var_list[i]) - rpn_clses.append(single_input[0]) - rpn_bboxes.append(single_input[1]) - anchors.append(single_input[2]) - anchor_vars.append(single_input[3]) - - rpn_cls = fluid.layers.concat(rpn_clses, axis=1) - rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) - anchors = fluid.layers.concat(anchors) - anchor_var = fluid.layers.concat(anchor_vars) - return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_head.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_head.py deleted file mode 100644 index 8080ed22f5057ece32f890c6bb2e44564700f9e1..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_head.py +++ /dev/null @@ -1,270 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -class SmoothL1Loss(object): - ''' - Smooth L1 loss - Args: - sigma (float): hyper param in smooth l1 loss - ''' - - def __init__(self, sigma=1.0): - super(SmoothL1Loss, self).__init__() - self.sigma = sigma - - def __call__(self, x, y, inside_weight=None, outside_weight=None): - return fluid.layers.smooth_l1( - x, - y, - inside_weight=inside_weight, - outside_weight=outside_weight, - sigma=self.sigma) - - -class BoxCoder(object): - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - fc6 = fluid.layers.fc( - input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc( - input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) - - return head_feat - - -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder(), - nms=MultiClassNMS(), - bbox_loss=SmoothL1Loss(), - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - self.bbox_loss = bbox_loss - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = self.bbox_loss( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - # self.box_coder - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - target_box=bbox_pred, - prior_box_var=self.box_coder.prior_box_var, - code_type=self.box_coder.code_type, - box_normalized=self.box_coder.box_normalized, - axis=self.box_coder.axis) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - # self.nms - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - normalized=self.nms.normalized, - nms_eta=self.nms.nms_eta, - background_label=self.nms.background_label) - return pred_result diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/data_feed.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/data_feed.py deleted file mode 100644 index b38501e5bf18c479f4de0de565cba26c84fa0c9d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/data_feed.py +++ /dev/null @@ -1,118 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid - -__all__ = ['test_reader'] - - -def test_reader(paths=None, images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is: - image (numpy.ndarray): the image to be fed into network - im_info (numpy.ndarray): the info about the preprocessed. - im_shape (numpy.ndarray): the shape of image. - """ - img_list = list() - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - im = im.astype(np.float32, copy=False) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - target_size = 800 - max_size = 1333 - - shape = im.shape - # im_shape holds the original shape of image. - im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') - im_size_min = np.min(shape[0:2]) - im_size_max = np.max(shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - - resize_w = np.round(im_scale * float(shape[1])) - resize_h = np.round(im_scale * float(shape[0])) - # im_info holds the resize info of image. - im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') - - im = cv2.resize( - im, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=cv2.INTER_LINEAR) - - # HWC --> CHW - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - yield {'image': im, 'im_info': im_info, 'im_shape': im_shape} - - -def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): - max_shape_org = np.array( - [data['image'].shape for data in batch_data]).max(axis=0) - if coarsest_stride > 0: - max_shape = np.zeros((3)).astype('int32') - max_shape[1] = int( - np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) - max_shape[2] = int( - np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) - else: - max_shape = max_shape_org.astype('int32') - - padding_image = list() - padding_info = list() - padding_shape = list() - - for data in batch_data: - im_c, im_h, im_w = data['image'].shape - # image - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) - padding_im[:, 0:im_h, 0:im_w] = data['image'] - padding_image.append(padding_im) - # im_info - data['im_info'][ - 0] = max_shape[1] if use_padded_im_info else max_shape_org[1] - data['im_info'][ - 1] = max_shape[2] if use_padded_im_info else max_shape_org[2] - padding_info.append(data['im_info']) - padding_shape.append(data['im_shape']) - - padding_image = np.array(padding_image).astype('float32') - padding_info = np.array(padding_info).astype('float32') - padding_shape = np.array(padding_shape).astype('float32') - return padding_image, padding_info, padding_shape diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/fpn.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/fpn.py deleted file mode 100644 index bd19c712ecc9b0112685c061046812f6ff418e42..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/fpn.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -__all__ = ['ConvNorm', 'FPN'] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr( - name=name + "_weights", - initializer=initializer, - learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr( - name=norm_name + '_scale', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=norm_name + '_offset', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - groups=norm_groups, - param_attr=pattr, - bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=lateral_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=fpn_inner_name + "_w", - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_inner_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len( - spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], - 1, - 'max', - pool_stride=2, - name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/module.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/module.py deleted file mode 100644 index 40335669605281c5dec07c974b34b3324abc2671..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/module.py +++ /dev/null @@ -1,250 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import ast -import argparse -from collections import OrderedDict -from functools import partial -from math import ceil - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from faster_rcnn_resnet50_fpn_venus.processor import load_label_info, postprocess, base64_to_cv2 -from faster_rcnn_resnet50_fpn_venus.data_feed import test_reader, padding_minibatch -from faster_rcnn_resnet50_fpn_venus.fpn import FPN -from faster_rcnn_resnet50_fpn_venus.resnet import ResNet -from faster_rcnn_resnet50_fpn_venus.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead -from faster_rcnn_resnet50_fpn_venus.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead -from faster_rcnn_resnet50_fpn_venus.bbox_assigner import BBoxAssigner -from faster_rcnn_resnet50_fpn_venus.roi_extractor import FPNRoIAlign - - -@moduleinfo( - name="faster_rcnn_resnet50_fpn_venus", - version="1.0.0", - type="cv/object_detection", - summary= - "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class FasterRCNNResNet50RPN(hub.Module): - def _initialize(self): - # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] - self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_fpn_model") - - def context(self, - num_classes=708, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs (dict): the input variables. - outputs (dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') - # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=[2, 3, 4, 5], - freeze_at=2) - body_feats = backbone(image) - # fpn - fpn = FPN( - max_level=6, - min_level=2, - num_chan=256, - spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) - var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) - body_feat_names = list(body_feats.keys()) - body_feats, spatial_scale = fpn.get_output(body_feats) - # rpn_head: RPNHead - rpn_head = self.rpn_head() - rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) - # train - if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) - rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) - # bbox_assigner: BBoxAssigner - bbox_assigner = self.bbox_assigner(num_classes) - outs = fluid.layers.generate_proposal_labels( - rpn_rois=rois, - gt_classes=gt_class, - is_crowd=is_crowd, - gt_boxes=gt_bbox, - im_info=im_info, - batch_size_per_im=bbox_assigner.batch_size_per_im, - fg_fraction=bbox_assigner.fg_fraction, - fg_thresh=bbox_assigner.fg_thresh, - bg_thresh_hi=bbox_assigner.bg_thresh_hi, - bg_thresh_lo=bbox_assigner.bg_thresh_lo, - bbox_reg_weights=bbox_assigner.bbox_reg_weights, - class_nums=bbox_assigner.class_nums, - use_random=bbox_assigner.use_random) - rois = outs[0] - - roi_extractor = self.roi_extractor() - roi_feat = roi_extractor( - head_inputs=body_feats, - rois=rois, - spatial_scale=spatial_scale) - # head_feat - bbox_head = self.bbox_head(num_classes) - head_feat = bbox_head.head(roi_feat) - if isinstance(head_feat, OrderedDict): - head_feat = list(head_feat.values())[0] - if phase == 'train': - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name, - 'gt_class': var_prefix + gt_class.name, - 'gt_bbox': var_prefix + gt_bbox.name, - 'is_crowd': var_prefix + is_crowd.name - } - outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] - } - elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name - } - outputs = { - 'head_features': var_prefix + head_feat.name, - 'rois': var_prefix + rois.name, - 'bbox_out': var_prefix + pred.name - } - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - if pretrained: - - def _if_exist(var): - if num_classes != 81: - if 'bbox_pred' in var.name or 'cls_score' in var.name: - return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - return inputs, outputs, context_prog - - def rpn_head(self): - return FPNRPNHead( - anchor_generator=AnchorGenerator( - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]), - rpn_target_assign=RPNTargetAssign( - rpn_batch_size_per_im=256, - rpn_fg_fraction=0.5, - rpn_negative_overlap=0.3, - rpn_positive_overlap=0.7, - rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=2000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=1000, - pre_nms_top_n=1000), - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6) - - def roi_extractor(self): - return FPNRoIAlign( - canconical_level=4, - canonical_size=224, - max_level=5, - min_level=2, - box_resolution=7, - sampling_ratio=2) - - def bbox_head(self, num_classes): - return BBoxHead( - head=TwoFCHead(mlp_dim=1024), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), - num_classes=num_classes) - - def bbox_assigner(self, num_classes): - return BBoxAssigner( - batch_size_per_im=512, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - fg_fraction=0.25, - fg_thresh=0.5, - class_nums=num_classes) diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/nonlocal_helper.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/processor.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/processor.py deleted file mode 100644 index 5fd35de2e7db2d524972c6f2bfd028cb77f81c2d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/processor.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = [ - 'base64_to_cv2', - 'load_label_info', - 'postprocess', -] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return xmin, ymin, xmax, ymax - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = confidence - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/resnet.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/roi_extractor.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/roi_extractor.py deleted file mode 100644 index 6e3398d8c4ceb4e78802f86de515c26d0a41e34b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/roi_extractor.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding=utf-8 -import paddle.fluid as fluid - -__all__ = ['FPNRoIAlign'] - - -class FPNRoIAlign(object): - """ - RoI align pooling for FPN feature maps - Args: - sampling_ratio (int): number of sampling points - min_level (int): lowest level of FPN layer - max_level (int): highest level of FPN layer - canconical_level (int): the canconical FPN feature map level - canonical_size (int): the canconical FPN feature map size - box_resolution (int): box resolution - mask_resolution (int): mask roi resolution - """ - - def __init__(self, - sampling_ratio=0, - min_level=2, - max_level=5, - canconical_level=4, - canonical_size=224, - box_resolution=7, - mask_resolution=14): - super(FPNRoIAlign, self).__init__() - self.sampling_ratio = sampling_ratio - self.min_level = min_level - self.max_level = max_level - self.canconical_level = canconical_level - self.canonical_size = canonical_size - self.box_resolution = box_resolution - self.mask_resolution = mask_resolution - - def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): - """ - Adopt RoI align onto several level of feature maps to get RoI features. - Distribute RoIs to different levels by area and get a list of RoI - features by distributed RoIs and their corresponding feature maps. - - Returns: - roi_feat(Variable): RoI features with shape of [M, C, R, R], - where M is the number of RoIs and R is RoI resolution - - """ - k_min = self.min_level - k_max = self.max_level - num_roi_lvls = k_max - k_min + 1 - name_list = list(head_inputs.keys()) - input_name_list = name_list[-num_roi_lvls:] - spatial_scale = spatial_scale[-num_roi_lvls:] - rois_dist, restore_index = fluid.layers.distribute_fpn_proposals( - rois, k_min, k_max, self.canconical_level, self.canonical_size) - # rois_dist is in ascend order - roi_out_list = [] - resolution = is_mask and self.mask_resolution or self.box_resolution - for lvl in range(num_roi_lvls): - name_index = num_roi_lvls - lvl - 1 - rois_input = rois_dist[lvl] - head_input = head_inputs[input_name_list[name_index]] - sc = spatial_scale[name_index] - roi_out = fluid.layers.roi_align( - input=head_input, - rois=rois_input, - pooled_height=resolution, - pooled_width=resolution, - spatial_scale=sc, - sampling_ratio=self.sampling_ratio) - roi_out_list.append(roi_out) - roi_feat_shuffle = fluid.layers.concat(roi_out_list) - roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) - roi_feat = fluid.layers.lod_reset(roi_feat_, rois) - - return roi_feat diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/rpn_head.py b/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/rpn_head.py deleted file mode 100644 index e1b69866d3938764669e63aebda321eb1e4c5439..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/rpn_head.py +++ /dev/null @@ -1,533 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -__all__ = [ - 'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead', - 'FPNRPNHead' -] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RPNTargetAssign(object): - # __op__ = fluid.layers.rpn_target_assign - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -class GenerateProposals(object): - # __op__ = fluid.layers.generate_proposals - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors self.anchor_generator - self.anchor, self.anchor_var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=self.anchor_generator.anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance, - stride=self.anchor_generator.stride) - - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal(loc=0., - scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - # prop_op - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - # self.rpn_target_assign - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, - use_random=self.rpn_target_assign.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} - - -class FPNRPNHead(RPNHead): - """ - RPN Head that supports FPN input - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - anchor_start_size (int): size of anchor at the first scale - num_chan (int): number of FPN output channels - min_level (int): lowest level of FPN output - max_level (int): highest level of FPN output - num_classes (int): number of classes in rpn output - """ - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6, - num_classes=1): - super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, - train_proposal, test_proposal) - self.anchor_start_size = anchor_start_size - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.num_classes = num_classes - - self.fpn_rpn_list = [] - self.anchors_list = [] - self.anchor_var_list = [] - - def _get_output(self, input, feat_lvl): - """ - Get anchor and FPN RPN head output at one level. - - Args: - input(Variable): Body feature from backbone. - feat_lvl(int): Indicate the level of rpn output corresponding - to the level of feature map. - - Return: - rpn_cls_score(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors * 4, H, W]. - """ - slvl = str(feat_lvl) - conv_name = 'conv_rpn_fpn' + slvl - cls_name = 'rpn_cls_logits_fpn' + slvl - bbox_name = 'rpn_bbox_pred_fpn' + slvl - conv_share_name = 'conv_rpn_fpn' + str(self.min_level) - cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) - bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) - - num_anchors = len(self.anchor_generator.aspect_ratios) - conv_rpn_fpn = fluid.layers.conv2d( - input=input, - num_filters=self.num_chan, - filter_size=3, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # self.anchor_generator - self.anchors, self.anchor_var = fluid.layers.anchor_generator( - input=conv_rpn_fpn, - anchor_sizes=(self.anchor_start_size * 2.** - (feat_lvl - self.min_level), ), - stride=(2.**feat_lvl, 2.**feat_lvl), - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance) - - cls_num_filters = num_anchors * self.num_classes - self.rpn_cls_score = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=cls_num_filters, - filter_size=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - self.rpn_bbox_pred = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=num_anchors * 4, - filter_size=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): - """ - Get proposals in one level according to the output of fpn rpn head - - Args: - body_feat(Variable): the feature map from backone. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - feat_lvl(int): Indicate the level of proposals corresponding to - the feature maps. - - Returns: - rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). - rpn_roi_probs_fpn(Variable): Scores of proposals with - shape of (rois_num, 1). - """ - - rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output( - body_feat, feat_lvl) - - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - if self.num_classes == 1: - rpn_cls_prob_fpn = fluid.layers.sigmoid( - rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) - else: - rpn_cls_score_fpn = fluid.layers.transpose( - rpn_cls_score_fpn, perm=[0, 2, 3, 1]) - rpn_cls_score_fpn = fluid.layers.reshape( - rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_fpn = fluid.layers.softmax( - rpn_cls_score_fpn, - use_cudnn=False, - name='rpn_cls_prob_fpn' + str(feat_lvl)) - rpn_cls_prob_fpn = fluid.layers.slice( - rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) - rpn_cls_prob_fpn = fluid.layers.reshape( - rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) - rpn_cls_prob_fpn = fluid.layers.transpose( - rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) - # prop_op - rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals( - scores=rpn_cls_prob_fpn, - bbox_deltas=rpn_bbox_pred_fpn, - im_info=im_info, - anchors=self.anchors, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois_fpn, rpn_roi_prob_fpn - - def get_proposals(self, fpn_feats, im_info, mode='train'): - """ - Get proposals in multiple levels according to the output of fpn - rpn head - - Args: - fpn_feats(dict): A dictionary represents the output feature map - of FPN with their name. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - - Return: - rois_list(Variable): Output proposals in shape of [rois_num, 4] - """ - rois_list = [] - roi_probs_list = [] - fpn_feat_names = list(fpn_feats.keys()) - for lvl in range(self.min_level, self.max_level + 1): - fpn_feat_name = fpn_feat_names[self.max_level - lvl] - fpn_feat = fpn_feats[fpn_feat_name] - rois_fpn, roi_probs_fpn = self._get_single_proposals( - fpn_feat, im_info, lvl, mode) - self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) - rois_list.append(rois_fpn) - roi_probs_list.append(roi_probs_fpn) - self.anchors_list.append(self.anchors) - self.anchor_var_list.append(self.anchor_var) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - post_nms_top_n = prop_op.post_nms_top_n - rois_collect = fluid.layers.collect_fpn_proposals( - rois_list, - roi_probs_list, - self.min_level, - self.max_level, - post_nms_top_n, - name='collect') - return rois_collect - - def _get_loss_input(self): - rpn_clses = [] - rpn_bboxes = [] - anchors = [] - anchor_vars = [] - for i in range(len(self.fpn_rpn_list)): - single_input = self._transform_input( - self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], - self.anchors_list[i], self.anchor_var_list[i]) - rpn_clses.append(single_input[0]) - rpn_bboxes.append(single_input[1]) - anchors.append(single_input[2]) - anchor_vars.append(single_input[3]) - - rpn_cls = fluid.layers.concat(rpn_clses, axis=1) - rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) - anchors = fluid.layers.concat(anchors) - anchor_var = fluid.layers.concat(anchor_vars) - return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py deleted file mode 100644 index b880e4f0e5af6b9fa7736a0c748546648ad71280..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py +++ /dev/null @@ -1,112 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import numpy as np -import cv2 -from PIL import Image, ImageEnhance -from paddle import fluid - -__all__ = ['test_reader', 'padding_minibatch'] - - -def test_reader(paths=None, images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (dict): key contains 'image' and 'im_info', the corresponding values is: - image (numpy.ndarray): the image to be fed into network - im_info (numpy.ndarray): the info about the preprocessed. - """ - img_list = list() - if paths: - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - for im in img_list: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - im = im.astype(np.float32, copy=False) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - target_size = 800 - max_size = 1333 - shape = im.shape - # im_shape holds the original shape of image. - # im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') - im_size_min = np.min(shape[0:2]) - im_size_max = np.max(shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - - resize_w = np.round(im_scale * float(shape[1])) - resize_h = np.round(im_scale * float(shape[0])) - # im_info holds the resize info of image. - im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') - - im = cv2.resize( - im, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=cv2.INTER_LINEAR) - - # HWC --> CHW - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - yield {'image': im, 'im_info': im_info} - - -def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): - max_shape_org = np.array( - [data['image'].shape for data in batch_data]).max(axis=0) - if coarsest_stride > 0: - max_shape = np.zeros((3)).astype('int32') - max_shape[1] = int( - np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) - max_shape[2] = int( - np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) - else: - max_shape = max_shape_org.astype('int32') - - padding_image = list() - padding_info = list() - padding_shape = list() - - for data in batch_data: - im_c, im_h, im_w = data['image'].shape - # image - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) - padding_im[:, 0:im_h, 0:im_w] = data['image'] - padding_image.append(padding_im) - # im_info - data['im_info'][ - 0] = max_shape[1] if use_padded_im_info else max_shape_org[1] - data['im_info'][ - 1] = max_shape[2] if use_padded_im_info else max_shape_org[2] - padding_info.append(data['im_info']) - - padding_image = np.array(padding_image).astype('float32') - padding_info = np.array(padding_info).astype('float32') - return padding_image, padding_info diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py deleted file mode 100644 index 5945527056b29ddc2310aa2a6ada1554cc7cec8d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py +++ /dev/null @@ -1,282 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -__all__ = ['FPN'] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr( - name=name + "_weights", - initializer=initializer, - learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr( - name=norm_name + '_scale', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=norm_name + '_offset', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - groups=norm_groups, - param_attr=pattr, - bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=lateral_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) - - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=fpn_inner_name + "_w", - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_inner_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len( - spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], - 1, - 'max', - pool_stride=2, - name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py deleted file mode 100644 index ceec9ca585e2a52d592638c0a9ebffc39ccb0cab..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py +++ /dev/null @@ -1,365 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import ast -import argparse -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from retinanet_resnet50_fpn_coco2017.fpn import FPN -from retinanet_resnet50_fpn_coco2017.retina_head import AnchorGenerator, RetinaTargetAssign, RetinaOutputDecoder, RetinaHead -from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from retinanet_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch -from retinanet_resnet50_fpn_coco2017.resnet import ResNet - - -@moduleinfo( - name="retinanet_resnet50_fpn_coco2017", - version="1.0.0", - type="cv/object_detection", - summary= - "Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class RetinaNetResNet50FPN(hub.Module): - def _initialize(self): - # default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608) - self.default_pretrained_model_path = os.path.join( - self.directory, "retinanet_resnet50_fpn_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.infer_prog = None - self.image = None - self.im_info = None - self.bbox_out = None - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of classes. - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - var_prefix = '@HUB_{}@'.format(self.name) - # image - image = fluid.layers.data( - name='image', - shape=[-1, 3, -1, -1], - dtype='float32', - lod_level=0) - # im_info - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - # backbone - backbone = ResNet( - norm_type='affine_channel', - freeze_at=2, - norm_decay=0., - depth=50, - feature_maps=[3, 4, 5]) - body_feats = backbone(image) - # retina_head - retina_head = RetinaHead( - anchor_generator=AnchorGenerator( - aspect_ratios=[1.0, 2.0, 0.5], - variance=[1.0, 1.0, 1.0, 1.0]), - target_assign=RetinaTargetAssign( - positive_overlap=0.5, negative_overlap=0.4), - output_decoder=RetinaOutputDecoder( - score_thresh=0.05, - nms_thresh=0.5, - pre_nms_top_n=1000, - detections_per_im=100, - nms_eta=1.0), - num_convs_per_octave=4, - num_chan=256, - max_level=7, - min_level=3, - prior_prob=0.01, - base_scale=4, - num_scales_per_octave=3) - # fpn - fpn = FPN( - max_level=7, - min_level=3, - num_chan=256, - spatial_scale=[0.03125, 0.0625, 0.125], - has_extra_convs=True) - # body_feats - body_feats, spatial_scale = fpn.get_output(body_feats) - # inputs, outputs, context_prog - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name - } - if phase == 'predict': - pred = retina_head.get_prediction(body_feats, spatial_scale, - im_info) - outputs = {'bbox_out': var_prefix + pred.name} - else: - outputs = { - 'body_features': [ - var_prefix + var.name - for key, var in body_feats.items() - ] - } - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - return inputs, outputs, context_prog - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def object_detection(self, - paths=None, - images=None, - use_gpu=False, - batch_size=1, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - visualization (bool): whether to save result as images. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_images = list() - paths = paths if paths else list() - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = int(np.ceil(images_num / batch_size)) - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - padding_image, padding_info = padding_minibatch( - batch_data, coarsest_stride=32, use_padded_im_info=True) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - feed_list = [padding_image_tensor, padding_info_tensor] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=handle_id, - visualization=visualization) - res += output - return res - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="input data") - - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - - def check_input_data(self, args): - input_data = list() - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError( - "File %s or %s is not exist." % image_path) - return self.object_detection( - paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py deleted file mode 100644 index eb2f7d2449f68e19c8fa98111eb5b44e3fcaa74e..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py +++ /dev/null @@ -1,174 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = [ - 'base64_to_cv2', - 'load_label_info', - 'postprocess', -] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - image.save(save_name) - - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, - handle_id, visualization): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output_dir = output_dir if output_dir else os.path.join( - os.getcwd(), 'detection_result') - if visualization: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py b/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py deleted file mode 100644 index 03b5fa9fe71ea0763bdad56f95e974ead311e413..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py +++ /dev/null @@ -1,426 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Constant -from paddle.fluid.regularizer import L2Decay - -__all__ = [ - 'AnchorGenerator', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'RetinaHead' -] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RetinaTargetAssign(object): - # __op__ = fluid.layers.retinanet_target_assign - def __init__(self, positive_overlap=0.5, negative_overlap=0.4): - self.positive_overlap = positive_overlap - self.negative_overlap = negative_overlap - - -class RetinaOutputDecoder(object): - # __op__ = fluid.layers.retinanet_detection_output - def __init__(self, - score_thresh=0.05, - nms_thresh=0.3, - pre_nms_top_n=1000, - detections_per_im=100, - nms_eta=1.0): - super(RetinaOutputDecoder, self).__init__() - self.score_threshold = score_thresh - self.nms_threshold = nms_thresh - self.nms_top_k = pre_nms_top_n - self.keep_top_k = detections_per_im - self.nms_eta = nms_eta - - -class RetinaHead(object): - """ - Retina Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - target_assign (object): `RetinaTargetAssign` instance - output_decoder (object): `RetinaOutputDecoder` instance - num_convs_per_octave (int): Number of convolution layers in each octave - num_chan (int): Number of octave output channels - max_level (int): Highest level of FPN output - min_level (int): Lowest level of FPN output - prior_prob (float): Used to set the bias init for the class prediction layer - base_scale (int): Anchors are generated based on this scale - num_scales_per_octave (int): Number of anchor scales per octave - num_classes (int): Number of classes - gamma (float): The parameter in focal loss - alpha (float): The parameter in focal loss - sigma (float): The parameter in smooth l1 loss - """ - __inject__ = ['anchor_generator', 'target_assign', 'output_decoder'] - __shared__ = ['num_classes'] - - def __init__(self, - anchor_generator=AnchorGenerator(), - target_assign=RetinaTargetAssign(), - output_decoder=RetinaOutputDecoder(), - num_convs_per_octave=4, - num_chan=256, - max_level=7, - min_level=3, - prior_prob=0.01, - base_scale=4, - num_scales_per_octave=3, - num_classes=81, - gamma=2.0, - alpha=0.25, - sigma=3.0151134457776365): - self.anchor_generator = anchor_generator - self.target_assign = target_assign - self.output_decoder = output_decoder - self.num_convs_per_octave = num_convs_per_octave - self.num_chan = num_chan - self.max_level = max_level - self.min_level = min_level - self.prior_prob = prior_prob - self.base_scale = base_scale - self.num_scales_per_octave = num_scales_per_octave - self.num_classes = num_classes - self.gamma = gamma - self.alpha = alpha - self.sigma = sigma - - def _class_subnet(self, body_feats, spatial_scale): - """ - Get class predictions of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - cls_pred_input(list): Class prediction of all input fpn levels. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - cls_pred_list = [] - for lvl in range(self.min_level, self.max_level + 1): - fpn_name = fpn_name_list[self.max_level - lvl] - subnet_blob = body_feats[fpn_name] - for i in range(self.num_convs_per_octave): - conv_name = 'retnet_cls_conv_n{}_fpn{}'.format(i, lvl) - conv_share_name = 'retnet_cls_conv_n{}_fpn{}'.format( - i, self.min_level) - subnet_blob_in = subnet_blob - subnet_blob = fluid.layers.conv2d( - input=subnet_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # class prediction - cls_name = 'retnet_cls_pred_fpn{}'.format(lvl) - cls_share_name = 'retnet_cls_pred_fpn{}'.format(self.min_level) - num_anchors = self.num_scales_per_octave * len( - self.anchor_generator.aspect_ratios) - cls_dim = num_anchors * (self.num_classes - 1) - # bias initialization: b = -log((1 - pai) / pai) - bias_init = float(-np.log((1 - self.prior_prob) / self.prior_prob)) - out_cls = fluid.layers.conv2d( - input=subnet_blob, - num_filters=cls_dim, - filter_size=3, - stride=1, - padding=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - initializer=Constant(value=bias_init), - learning_rate=2., - regularizer=L2Decay(0.))) - cls_pred_list.append(out_cls) - - return cls_pred_list - - def _bbox_subnet(self, body_feats, spatial_scale): - """ - Get bounding box predictions of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - bbox_pred_input(list): Bounding box prediction of all input fpn - levels. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - bbox_pred_list = [] - for lvl in range(self.min_level, self.max_level + 1): - fpn_name = fpn_name_list[self.max_level - lvl] - subnet_blob = body_feats[fpn_name] - for i in range(self.num_convs_per_octave): - conv_name = 'retnet_bbox_conv_n{}_fpn{}'.format(i, lvl) - conv_share_name = 'retnet_bbox_conv_n{}_fpn{}'.format( - i, self.min_level) - subnet_blob_in = subnet_blob - subnet_blob = fluid.layers.conv2d( - input=subnet_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # bbox prediction - bbox_name = 'retnet_bbox_pred_fpn{}'.format(lvl) - bbox_share_name = 'retnet_bbox_pred_fpn{}'.format(self.min_level) - num_anchors = self.num_scales_per_octave * len( - self.anchor_generator.aspect_ratios) - bbox_dim = num_anchors * 4 - out_bbox = fluid.layers.conv2d( - input=subnet_blob, - num_filters=bbox_dim, - filter_size=3, - stride=1, - padding=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - bbox_pred_list.append(out_bbox) - return bbox_pred_list - - def _anchor_generate(self, body_feats, spatial_scale): - """ - Get anchor boxes of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Return: - anchor_input(list): Anchors of all input fpn levels with shape of. - anchor_var_input(list): Anchor variance of all input fpn levels with shape. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - anchor_list = [] - anchor_var_list = [] - for lvl in range(self.min_level, self.max_level + 1): - anchor_sizes = [] - stride = int(1 / spatial_scale[self.max_level - lvl]) - for octave in range(self.num_scales_per_octave): - anchor_size = stride * (2**(float(octave) / float( - self.num_scales_per_octave))) * self.base_scale - anchor_sizes.append(anchor_size) - fpn_name = fpn_name_list[self.max_level - lvl] - anchor, anchor_var = fluid.layers.anchor_generator( - input=body_feats[fpn_name], - anchor_sizes=anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - stride=[stride, stride], - variance=self.anchor_generator.variance) - anchor_list.append(anchor) - anchor_var_list.append(anchor_var) - return anchor_list, anchor_var_list - - def _get_output(self, body_feats, spatial_scale): - """ - Get class, bounding box predictions and anchor boxes of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - cls_pred_input(list): Class prediction of all input fpn levels. - bbox_pred_input(list): Bounding box prediction of all input fpn - levels. - anchor_input(list): Anchors of all input fpn levels with shape of. - anchor_var_input(list): Anchor variance of all input fpn levels with - shape. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - # class subnet - cls_pred_list = self._class_subnet(body_feats, spatial_scale) - # bbox subnet - bbox_pred_list = self._bbox_subnet(body_feats, spatial_scale) - #generate anchors - anchor_list, anchor_var_list = self._anchor_generate( - body_feats, spatial_scale) - cls_pred_reshape_list = [] - bbox_pred_reshape_list = [] - anchor_reshape_list = [] - anchor_var_reshape_list = [] - for i in range(self.max_level - self.min_level + 1): - cls_pred_transpose = fluid.layers.transpose( - cls_pred_list[i], perm=[0, 2, 3, 1]) - cls_pred_reshape = fluid.layers.reshape( - cls_pred_transpose, shape=(0, -1, self.num_classes - 1)) - bbox_pred_transpose = fluid.layers.transpose( - bbox_pred_list[i], perm=[0, 2, 3, 1]) - bbox_pred_reshape = fluid.layers.reshape( - bbox_pred_transpose, shape=(0, -1, 4)) - anchor_reshape = fluid.layers.reshape(anchor_list[i], shape=(-1, 4)) - anchor_var_reshape = fluid.layers.reshape( - anchor_var_list[i], shape=(-1, 4)) - cls_pred_reshape_list.append(cls_pred_reshape) - bbox_pred_reshape_list.append(bbox_pred_reshape) - anchor_reshape_list.append(anchor_reshape) - anchor_var_reshape_list.append(anchor_var_reshape) - output = {} - output['cls_pred'] = cls_pred_reshape_list - output['bbox_pred'] = bbox_pred_reshape_list - output['anchor'] = anchor_reshape_list - output['anchor_var'] = anchor_var_reshape_list - return output - - def get_prediction(self, body_feats, spatial_scale, im_info): - """ - Get prediction bounding box in test stage. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - output = self._get_output(body_feats, spatial_scale) - cls_pred_reshape_list = output['cls_pred'] - bbox_pred_reshape_list = output['bbox_pred'] - anchor_reshape_list = output['anchor'] - for i in range(self.max_level - self.min_level + 1): - cls_pred_reshape_list[i] = fluid.layers.sigmoid( - cls_pred_reshape_list[i]) - pred_result = fluid.layers.retinanet_detection_output( - bboxes=bbox_pred_reshape_list, - scores=cls_pred_reshape_list, - anchors=anchor_reshape_list, - im_info=im_info, - score_threshold=self.output_decoder.score_threshold, - nms_threshold=self.output_decoder.nms_threshold, - nms_top_k=self.output_decoder.nms_top_k, - keep_top_k=self.output_decoder.keep_top_k, - nms_eta=self.output_decoder.nms_eta) - return pred_result - - def get_loss(self, body_feats, spatial_scale, im_info, gt_box, gt_label, - is_crowd): - """ - Calculate the loss of retinanet. - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - gt_label(Variable): The ground-truth labels with shape [M, 1]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - loss_cls(Variable): focal loss. - loss_bbox(Variable): smooth l1 loss. - """ - output = self._get_output(body_feats, spatial_scale) - cls_pred_reshape_list = output['cls_pred'] - bbox_pred_reshape_list = output['bbox_pred'] - anchor_reshape_list = output['anchor'] - anchor_var_reshape_list = output['anchor_var'] - - cls_pred_input = fluid.layers.concat(cls_pred_reshape_list, axis=1) - bbox_pred_input = fluid.layers.concat(bbox_pred_reshape_list, axis=1) - anchor_input = fluid.layers.concat(anchor_reshape_list, axis=0) - anchor_var_input = fluid.layers.concat(anchor_var_reshape_list, axis=0) - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight, fg_num = \ - fluid.layers.rpn_target_assign( - bbox_pred=bbox_pred_input, - cls_logits=cls_pred_input, - anchor_box=anchor_input, - anchor_var=anchor_var_input, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - im_info=im_info, - num_classes=self.num_classes - 1, - rpn_batch_size_per_im=self.target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.target_assign.rpn_negative_overlap, - use_random=self.target_assign.use_random) - fg_num = fluid.layers.reduce_sum(fg_num, name='fg_num') - score_tgt = fluid.layers.cast(score_tgt, 'int32') - loss_cls = fluid.layers.sigmoid_focal_loss( - x=score_pred, - label=score_tgt, - fg_num=fg_num, - gamma=self.gamma, - alpha=self.alpha) - loss_cls = fluid.layers.reduce_sum(loss_cls, name='loss_cls') - loss_bbox = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=self.sigma, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - loss_bbox = fluid.layers.reduce_sum(loss_bbox, name='loss_bbox') - loss_bbox = loss_bbox / fg_num - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py b/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py deleted file mode 100644 index 42677536f593562b0537ca3df6b12370af677e0a..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py +++ /dev/null @@ -1,218 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -import random -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image -from paddle import fluid - -__all__ = ['reader'] - - -class DecodeImage(object): - def __init__(self, to_rgb=True, with_mixup=False): - """ Transform the image data to numpy format. - - Args: - to_rgb (bool): whether to convert BGR to RGB - with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score - """ - self.to_rgb = to_rgb - self.with_mixup = with_mixup - - def __call__(self, im): - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - return im - - -class ResizeImage(object): - def __init__(self, - target_size=0, - max_size=0, - interp=cv2.INTER_LINEAR, - use_cv2=True): - """ - Rescale image to the specified target size, and capped at max_size - if max_size != 0. - If target_size is list, selected a scale randomly as the specified - target size. - - Args: - target_size (int|list): the target size of image's short side, - multi-scale training is adopted when type is list. - max_size (int): the max size of image - interp (int): the interpolation method - use_cv2 (bool): use the cv2 interpolation method or use PIL - interpolation method - """ - self.max_size = int(max_size) - self.interp = int(interp) - self.use_cv2 = use_cv2 - self.target_size = target_size - - def __call__(self, im): - if not isinstance(im, np.ndarray): - raise TypeError("{}: image type is not numpy.".format(self)) - if len(im.shape) != 3: - raise ValueError('{}: image is not 3-dimensional.'.format(self)) - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if isinstance(self.target_size, list): - # Case for multi-scale training - selected_size = random.choice(self.target_size) - else: - selected_size = self.target_size - if float(im_size_min) == 0: - raise ZeroDivisionError('{}: min size of image is 0'.format(self)) - if self.max_size != 0: - im_scale = float(selected_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - - resize_w = im_scale_x * float(im_shape[1]) - resize_h = im_scale_y * float(im_shape[0]) - im_info = [resize_h, resize_w, im_scale] - else: - im_scale_x = float(selected_size) / float(im_shape[1]) - im_scale_y = float(selected_size) / float(im_shape[0]) - - resize_w = selected_size - resize_h = selected_size - - if self.use_cv2: - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - else: - if self.max_size != 0: - raise TypeError( - 'If you set max_size to cap the maximum size of image,' - 'please set use_cv2 to True to resize the image.') - im = im.astype('uint8') - im = Image.fromarray(im) - im = im.resize((int(resize_w), int(resize_h)), self.interp) - im = np.array(im) - - return im - - -class NormalizeImage(object): - def __init__(self, - mean=[0.485, 0.456, 0.406], - std=[1, 1, 1], - is_scale=True, - is_channel_first=True): - """ - Args: - mean (list): the pixel mean - std (list): the pixel variance - """ - self.mean = mean - self.std = std - self.is_scale = is_scale - self.is_channel_first = is_channel_first - - def __call__(self, im): - """Normalize the image. - - Operators: - 1.(optional) Scale the image to [0,1] - 2. Each pixel minus mean and is divided by std - """ - im = im.astype(np.float32, copy=False) - if self.is_channel_first: - mean = np.array(self.mean)[:, np.newaxis, np.newaxis] - std = np.array(self.std)[:, np.newaxis, np.newaxis] - else: - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - if self.is_scale: - im = im / 255.0 - im -= mean - im /= std - return im - - -class Permute(object): - def __init__(self, to_bgr=True, channel_first=True): - """ - Change the channel. - - Args: - to_bgr (bool): confirm whether to convert RGB to BGR - channel_first (bool): confirm whether to change channel - """ - self.to_bgr = to_bgr - self.channel_first = channel_first - - def __call__(self, im): - if self.channel_first: - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - if self.to_bgr: - im = im[[2, 1, 0], :, :] - return im - - -def reader(paths=[], - images=None, - decode_image=DecodeImage(to_rgb=True, with_mixup=False), - resize_image=ResizeImage( - target_size=512, interp=1, max_size=0, use_cv2=False), - permute_image=Permute(to_bgr=False), - normalize_image=NormalizeImage( - mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - decode_image (class object): instance of - resize_image (class object): instance of - permute_image (class object): instance of - normalize_image (class object): instance of - """ - img_list = [] - if paths is not None: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - decode_image = DecodeImage(to_rgb=True, with_mixup=False) - resize_image = ResizeImage( - target_size=300, interp=1, max_size=0, use_cv2=False) - permute_image = Permute() - normalize_image = NormalizeImage( - mean=[127.5, 127.5, 127.5], - std=[127.502231, 127.502231, 127.502231], - is_scale=False) - - for img in img_list: - preprocessed_img = decode_image(img) - preprocessed_img = resize_image(preprocessed_img) - preprocessed_img = permute_image(preprocessed_img) - preprocessed_img = normalize_image(preprocessed_img) - yield [preprocessed_img] diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py b/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py deleted file mode 100644 index 742391cc27f9e82425682a0c485b0fe08e5fdc37..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py +++ /dev/null @@ -1,211 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MobileNet'] - - -class MobileNet(object): - """ - MobileNet v1, see https://arxiv.org/abs/1704.04861 - - Args: - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - conv_group_scale (int): scaling factor for convolution groups - with_extra_blocks (bool): if extra blocks should be added - extra_block_filters (list): number of filter for each extra block - class_dim (int): number of class while classification - yolo_v3 (bool): whether to output layers which yolo_v3 needs - """ - __shared__ = ['norm_type', 'weight_prefix_name'] - - def __init__(self, - norm_type='bn', - norm_decay=0., - conv_group_scale=1, - conv_learning_rate=1.0, - with_extra_blocks=False, - extra_block_filters=[[256, 512], [128, 256], [128, 256], - [64, 128]], - weight_prefix_name='', - class_dim=1000, - yolo_v3=False): - self.norm_type = norm_type - self.norm_decay = norm_decay - self.conv_group_scale = conv_group_scale - self.conv_learning_rate = conv_learning_rate - self.with_extra_blocks = with_extra_blocks - self.extra_block_filters = extra_block_filters - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.yolo_v3 = yolo_v3 - - def _conv_norm(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=self.conv_learning_rate, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - - bn_name = name + "_bn" - norm_decay = self.norm_decay - bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def depthwise_separable(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): - depthwise_conv = self._conv_norm( - input=input, - filter_size=3, - num_filters=int(num_filters1 * scale), - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False, - name=name + "_dw") - - pointwise_conv = self._conv_norm( - input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - return pointwise_conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - name=None): - pointwise_conv = self._conv_norm( - input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - num_groups=int(num_groups), - padding=0, - name=name + "_extra1") - normal_conv = self._conv_norm( - input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=2, - num_groups=int(num_groups), - padding=1, - name=name + "_extra2") - return normal_conv - - def __call__(self, input): - scale = self.conv_group_scale - blocks = [] - # input 1/1 - out = self._conv_norm( - input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") - # 1/2 - out = self.depthwise_separable( - out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") - out = self.depthwise_separable( - out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") - # 1/4 - out = self.depthwise_separable( - out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") - out = self.depthwise_separable( - out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") - # 1/8 - blocks.append(out) - out = self.depthwise_separable( - out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") - out = self.depthwise_separable( - out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") - # 1/16 - blocks.append(out) - for i in range(5): - out = self.depthwise_separable( - out, - 512, - 512, - 512, - 1, - scale, - name=self.prefix_name + "conv5_" + str(i + 1)) - module11 = out - - out = self.depthwise_separable( - out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") - # 1/32 - out = self.depthwise_separable( - out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") - module13 = out - blocks.append(out) - if self.yolo_v3: - return blocks - if not self.with_extra_blocks: - out = fluid.layers.pool2d( - input=out, pool_type='avg', global_pooling=True) - out = fluid.layers.fc( - input=out, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - out = fluid.layers.softmax(out) - blocks.append(out) - return blocks - - num_filters = self.extra_block_filters - module14 = self._extra_block(module13, num_filters[0][0], - num_filters[0][1], 1, 2, - self.prefix_name + "conv7_1") - module15 = self._extra_block(module14, num_filters[1][0], - num_filters[1][1], 1, 2, - self.prefix_name + "conv7_2") - module16 = self._extra_block(module15, num_filters[2][0], - num_filters[2][1], 1, 2, - self.prefix_name + "conv7_3") - module17 = self._extra_block(module16, num_filters[3][0], - num_filters[3][1], 1, 2, - self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py b/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py deleted file mode 100644 index 8ee1e7edf1ced1b0e1d616e780d91a7f4e53cb20..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py +++ /dev/null @@ -1,322 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import yaml -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet -from ssd_mobilenet_v1_pascal.processor import load_label_info, postprocess, base64_to_cv2 -from ssd_mobilenet_v1_pascal.data_feed import reader - - -@moduleinfo( - name="ssd_mobilenet_v1_pascal", - version="1.1.1", - type="cv/object_detection", - summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class SSDMobileNetv1(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ssd_mobilenet_v1_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.model_config = None - self._set_config() - - def _set_config(self): - # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - # model config setting. - if not self.model_config: - with open(os.path.join(self.directory, 'config.yml')) as fp: - self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) - - self.multi_box_head_config = self.model_config['MultiBoxHead'] - self.output_decoder_config = self.model_config['SSDOutputDecoder'] - self.mobilenet_config = self.model_config['MobileNet'] - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 300, 300], dtype='float32') - # backbone - backbone = MobileNet(**self.mobilenet_config) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # names of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # names of outputs - if get_prediction: - locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, - image=image, - num_classes=21, - **self.multi_box_head_config) - pred = fluid.layers.detection_output( - loc=locs, - scores=confs, - prior_box=box, - prior_box_var=box_var, - **self.output_decoder_config) - outputs = {'bbox_out': [var_prefix + pred.name]} - else: - outputs = { - 'body_features': - [var_prefix + var.name for var in body_feats] - } - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - outputs = { - out_key: [ - context_prog.global_block().vars[varname] - for varname in out_value - ] - for out_key, out_value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - data=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] - - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py b/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py deleted file mode 100644 index 6960d1efd3585b863969518a1b71b9309cc8c506..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """ - Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - bbox[0] = bbox[0] * org_img_width - bbox[1] = bbox[1] * org_img_height - bbox[2] = bbox[2] * org_img_width - bbox[3] = bbox[3] * org_img_height - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py b/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py deleted file mode 100644 index 9629248d360e0289c0614748097acc97710a12e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py +++ /dev/null @@ -1,212 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -import random -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image -from paddle import fluid - -__all__ = ['reader'] - - -class DecodeImage(object): - def __init__(self, to_rgb=True, with_mixup=False): - """ Transform the image data to numpy format. - - Args: - to_rgb (bool): whether to convert BGR to RGB - with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score - """ - self.to_rgb = to_rgb - self.with_mixup = with_mixup - - def __call__(self, im): - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - return im - - -class ResizeImage(object): - def __init__(self, - target_size=0, - max_size=0, - interp=cv2.INTER_LINEAR, - use_cv2=True): - """ - Rescale image to the specified target size, and capped at max_size - if max_size != 0. - If target_size is list, selected a scale randomly as the specified - target size. - - Args: - target_size (int|list): the target size of image's short side, - multi-scale training is adopted when type is list. - max_size (int): the max size of image - interp (int): the interpolation method - use_cv2 (bool): use the cv2 interpolation method or use PIL - interpolation method - """ - self.max_size = int(max_size) - self.interp = int(interp) - self.use_cv2 = use_cv2 - self.target_size = target_size - - def __call__(self, im): - if not isinstance(im, np.ndarray): - raise TypeError("{}: image type is not numpy.".format(self)) - if len(im.shape) != 3: - raise ValueError('{}: image is not 3-dimensional.'.format(self)) - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if isinstance(self.target_size, list): - # Case for multi-scale training - selected_size = random.choice(self.target_size) - else: - selected_size = self.target_size - if float(im_size_min) == 0: - raise ZeroDivisionError('{}: min size of image is 0'.format(self)) - if self.max_size != 0: - im_scale = float(selected_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - - resize_w = im_scale_x * float(im_shape[1]) - resize_h = im_scale_y * float(im_shape[0]) - im_info = [resize_h, resize_w, im_scale] - else: - im_scale_x = float(selected_size) / float(im_shape[1]) - im_scale_y = float(selected_size) / float(im_shape[0]) - - resize_w = selected_size - resize_h = selected_size - - if self.use_cv2: - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - else: - if self.max_size != 0: - raise TypeError( - 'If you set max_size to cap the maximum size of image,' - 'please set use_cv2 to True to resize the image.') - im = im.astype('uint8') - im = Image.fromarray(im) - im = im.resize((int(resize_w), int(resize_h)), self.interp) - im = np.array(im) - - return im - - -class NormalizeImage(object): - def __init__(self, - mean=[0.485, 0.456, 0.406], - std=[1, 1, 1], - is_scale=True, - is_channel_first=True): - """ - Args: - mean (list): the pixel mean - std (list): the pixel variance - """ - self.mean = mean - self.std = std - self.is_scale = is_scale - self.is_channel_first = is_channel_first - - def __call__(self, im): - """Normalize the image. - - Operators: - 1.(optional) Scale the image to [0,1] - 2. Each pixel minus mean and is divided by std - """ - im = im.astype(np.float32, copy=False) - if self.is_channel_first: - mean = np.array(self.mean)[:, np.newaxis, np.newaxis] - std = np.array(self.std)[:, np.newaxis, np.newaxis] - else: - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - if self.is_scale: - im = im / 255.0 - im -= mean - im /= std - return im - - -class Permute(object): - def __init__(self, to_bgr=True, channel_first=True): - """ - Change the channel. - - Args: - to_bgr (bool): confirm whether to convert RGB to BGR - channel_first (bool): confirm whether to change channel - """ - self.to_bgr = to_bgr - self.channel_first = channel_first - - def __call__(self, im): - if self.channel_first: - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - if self.to_bgr: - im = im[[2, 1, 0], :, :] - return im - - -def reader(paths=[], - images=None, - decode_image=DecodeImage(to_rgb=True, with_mixup=False), - resize_image=ResizeImage( - target_size=512, interp=1, max_size=0, use_cv2=False), - permute_image=Permute(to_bgr=False), - normalize_image=NormalizeImage( - mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - decode_image (class object): instance of - resize_image (class object): instance of - permute_image (class object): instance of - normalize_image (class object): instance of - """ - img_list = [] - if paths is not None: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - resize_image = ResizeImage( - target_size=300, interp=1, max_size=0, use_cv2=False) - - for img in img_list: - preprocessed_img = decode_image(img) - preprocessed_img = resize_image(preprocessed_img) - preprocessed_img = permute_image(preprocessed_img) - preprocessed_img = normalize_image(preprocessed_img) - yield [preprocessed_img] diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py b/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py deleted file mode 100644 index 7310bf04d9e208e36e4d72a25ccc93b8a001012f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py +++ /dev/null @@ -1,311 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import yaml -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from ssd_vgg16_300_coco2017.vgg import VGG -from ssd_vgg16_300_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from ssd_vgg16_300_coco2017.data_feed import reader - - -@moduleinfo( - name="ssd_vgg16_300_coco2017", - version="1.0.1", - type="cv/object_detection", - summary="SSD with backbone VGG16, trained with dataset COCO.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class SSDVGG16(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ssd_vgg16_300_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.model_config = None - self._set_config() - - def _set_config(self): - # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - # model config setting. - if not self.model_config: - with open(os.path.join(self.directory, 'config.yml')) as fp: - self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) - - self.multi_box_head_config = self.model_config['MultiBoxHead'] - self.output_decoder_config = self.model_config['SSDOutputDecoder'] - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 300, 300], dtype='float32') - # backbone - backbone = VGG( - depth=16, - with_extra_blocks=True, - normalizations=[20., -1, -1, -1, -1, -1]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # names of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # names of outputs - if get_prediction: - locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, - image=image, - num_classes=81, - **self.multi_box_head_config) - pred = fluid.layers.detection_output( - loc=locs, - scores=confs, - prior_box=box, - prior_box_var=box_var, - **self.output_decoder_config) - outputs = {'bbox_out': [var_prefix + pred.name]} - else: - outputs = { - 'body_features': - [var_prefix + var.name for var in body_feats] - } - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - outputs = { - out_key: [ - context_prog.global_block().vars[varname] - for varname in out_value - ] - for out_key, out_value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py b/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py deleted file mode 100644 index 6960d1efd3585b863969518a1b71b9309cc8c506..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """ - Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - bbox[0] = bbox[0] * org_img_width - bbox[1] = bbox[1] * org_img_height - bbox[2] = bbox[2] * org_img_width - bbox[3] = bbox[3] * org_img_height - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py b/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py deleted file mode 100644 index dc760f328947a16cddb0b1d0d2d7556fc651fbf7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py +++ /dev/null @@ -1,224 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['VGG'] - - -class VGG(object): - """ - VGG, see https://arxiv.org/abs/1409.1556 - - Args: - depth (int): the VGG net depth (16 or 19) - normalizations (list): params list of init scale in l2 norm, skip init - scale if param is -1. - with_extra_blocks (bool): whether or not extra blocks should be added - extra_block_filters (list): in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=16, - with_extra_blocks=False, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], - [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]], - class_dim=1000): - assert depth in [16, 19], "depth {} not in [16, 19]" - self.depth = depth - self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} - self.with_extra_blocks = with_extra_blocks - self.normalizations = normalizations - self.extra_block_filters = extra_block_filters - self.class_dim = class_dim - - def __call__(self, input): - layers = [] - layers += self._vgg_block(input) - - if not self.with_extra_blocks: - return layers[-1] - - layers += self._add_extras_block(layers[-1]) - norm_cfg = self.normalizations - for k, v in enumerate(layers): - if not norm_cfg[k] == -1: - layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) - - return layers - - def _vgg_block(self, input): - nums = self.depth_cfg[self.depth] - vgg_base = [64, 128, 256, 512, 512] - conv = input - res_layer = [] - layers = [] - for k, v in enumerate(vgg_base): - conv = self._conv_block( - conv, v, nums[k], name="conv{}_".format(k + 1)) - layers.append(conv) - if self.with_extra_blocks: - if k == 4: - conv = self._pooling_block(conv, 3, 1, pool_padding=1) - else: - conv = self._pooling_block(conv, 2, 2) - else: - conv = self._pooling_block(conv, 2, 2) - if not self.with_extra_blocks: - fc_dim = 4096 - fc_name = ["fc6", "fc7", "fc8"] - fc1 = fluid.layers.fc( - input=conv, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_offset")) - fc2 = fluid.layers.fc( - input=fc1, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_offset")) - out = fluid.layers.fc( - input=fc2, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_offset")) - out = fluid.layers.softmax(out) - res_layer.append(out) - return [out] - else: - fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") - fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") - return [layers[3], fc7] - - def _add_extras_block(self, input): - cfg = self.extra_block_filters - conv = input - layers = [] - for k, v in enumerate(cfg): - assert len(v) == 5, "extra_block_filters size not fix" - conv = self._extra_block( - conv, - v[0], - v[1], - v[2], - v[3], - v[4], - name="conv{}_".format(6 + k)) - layers.append(conv) - - return layers - - def _conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = self._conv_layer( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=name + str(i + 1)) - return conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - padding_size, - stride_size, - filter_size, - name=None): - # 1x1 conv - conv_1 = self._conv_layer( - input=input, - num_filters=int(num_filters1), - filter_size=1, - stride=1, - act='relu', - padding=0, - name=name + "1") - - # 3x3 conv - conv_2 = self._conv_layer( - input=conv_1, - num_filters=int(num_filters2), - filter_size=filter_size, - stride=stride_size, - act='relu', - padding=padding_size, - name=name + "2") - return conv_2 - - def _conv_layer(self, - input, - num_filters, - filter_size, - stride, - padding, - dilation=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr( - name=name + "_biases") if self.with_extra_blocks else False, - name=name + '.conv2d.output.1') - return conv - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): - from paddle.fluid.layer_helper import LayerHelper - from paddle.fluid.initializer import Constant - helper = LayerHelper("Scale") - l2_norm = fluid.layers.l2_normalize( - input, axis=1) # l2 norm along channel - shape = [1] if channel_shared else [input.shape[1]] - scale = helper.create_parameter( - attr=helper.param_attr, - shape=shape, - dtype=input.dtype, - default_initializer=Constant(init_scale)) - out = fluid.layers.elementwise_mul( - x=l2_norm, - y=scale, - axis=-1 if channel_shared else 1, - name="conv4_3_norm_scale") - return out diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py b/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py deleted file mode 100644 index c1994f1161ef5d8618f9a917186ec75c39b23d94..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py +++ /dev/null @@ -1,209 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -import random -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image -from paddle import fluid - -__all__ = ['reader'] - - -class DecodeImage(object): - def __init__(self, to_rgb=True, with_mixup=False): - """ Transform the image data to numpy format. - - Args: - to_rgb (bool): whether to convert BGR to RGB - with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score - """ - self.to_rgb = to_rgb - self.with_mixup = with_mixup - - def __call__(self, im): - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - return im - - -class ResizeImage(object): - def __init__(self, - target_size=0, - max_size=0, - interp=cv2.INTER_LINEAR, - use_cv2=True): - """ - Rescale image to the specified target size, and capped at max_size - if max_size != 0. - If target_size is list, selected a scale randomly as the specified - target size. - - Args: - target_size (int|list): the target size of image's short side, - multi-scale training is adopted when type is list. - max_size (int): the max size of image - interp (int): the interpolation method - use_cv2 (bool): use the cv2 interpolation method or use PIL - interpolation method - """ - self.max_size = int(max_size) - self.interp = int(interp) - self.use_cv2 = use_cv2 - self.target_size = target_size - - def __call__(self, im): - if not isinstance(im, np.ndarray): - raise TypeError("{}: image type is not numpy.".format(self)) - if len(im.shape) != 3: - raise ValueError('{}: image is not 3-dimensional.'.format(self)) - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if isinstance(self.target_size, list): - # Case for multi-scale training - selected_size = random.choice(self.target_size) - else: - selected_size = self.target_size - if float(im_size_min) == 0: - raise ZeroDivisionError('{}: min size of image is 0'.format(self)) - if self.max_size != 0: - im_scale = float(selected_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - - resize_w = im_scale_x * float(im_shape[1]) - resize_h = im_scale_y * float(im_shape[0]) - im_info = [resize_h, resize_w, im_scale] - else: - im_scale_x = float(selected_size) / float(im_shape[1]) - im_scale_y = float(selected_size) / float(im_shape[0]) - - resize_w = selected_size - resize_h = selected_size - - if self.use_cv2: - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - else: - if self.max_size != 0: - raise TypeError( - 'If you set max_size to cap the maximum size of image,' - 'please set use_cv2 to True to resize the image.') - im = im.astype('uint8') - im = Image.fromarray(im) - im = im.resize((int(resize_w), int(resize_h)), self.interp) - im = np.array(im) - - return im - - -class NormalizeImage(object): - def __init__(self, - mean=[0.485, 0.456, 0.406], - std=[1, 1, 1], - is_scale=True, - is_channel_first=True): - """ - Args: - mean (list): the pixel mean - std (list): the pixel variance - """ - self.mean = mean - self.std = std - self.is_scale = is_scale - self.is_channel_first = is_channel_first - - def __call__(self, im): - """Normalize the image. - - Operators: - 1.(optional) Scale the image to [0,1] - 2. Each pixel minus mean and is divided by std - """ - im = im.astype(np.float32, copy=False) - if self.is_channel_first: - mean = np.array(self.mean)[:, np.newaxis, np.newaxis] - std = np.array(self.std)[:, np.newaxis, np.newaxis] - else: - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - if self.is_scale: - im = im / 255.0 - im -= mean - im /= std - return im - - -class Permute(object): - def __init__(self, to_bgr=True, channel_first=True): - """ - Change the channel. - - Args: - to_bgr (bool): confirm whether to convert RGB to BGR - channel_first (bool): confirm whether to change channel - """ - self.to_bgr = to_bgr - self.channel_first = channel_first - - def __call__(self, im): - if self.channel_first: - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - if self.to_bgr: - im = im[[2, 1, 0], :, :] - return im - - -def reader(paths=[], - images=None, - decode_image=DecodeImage(to_rgb=True, with_mixup=False), - resize_image=ResizeImage( - target_size=512, interp=1, max_size=0, use_cv2=False), - permute_image=Permute(to_bgr=False), - normalize_image=NormalizeImage( - mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - decode_image (class object): instance of - resize_image (class object): instance of - permute_image (class object): instance of - normalize_image (class object): instance of - """ - img_list = [] - if paths is not None: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for img in img_list: - preprocessed_img = decode_image(img) - preprocessed_img = resize_image(preprocessed_img) - preprocessed_img = permute_image(preprocessed_img) - preprocessed_img = normalize_image(preprocessed_img) - yield [preprocessed_img] diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py b/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py deleted file mode 100644 index e7246367c5e9e895e8d840f37bb66ce8e0ad2b17..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py +++ /dev/null @@ -1,325 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import yaml -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from ssd_vgg16_512_coco2017.vgg import VGG -from ssd_vgg16_512_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from ssd_vgg16_512_coco2017.data_feed import reader - - -@moduleinfo( - name="ssd_vgg16_512_coco2017", - version="1.0.1", - type="cv/object_detection", - summary="SSD with backbone VGG16, trained with dataset COCO.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class SSDVGG16_512(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ssd_vgg16_512_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self.model_config = None - self._set_config() - - def _set_config(self): - # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - # model config setting. - if not self.model_config: - with open(os.path.join(self.directory, 'config.yml')) as fp: - self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) - - self.multi_box_head_config = self.model_config['MultiBoxHead'] - self.output_decoder_config = self.model_config['SSDOutputDecoder'] - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 512, 512], dtype='float32') - # backbone - backbone = VGG( - depth=16, - with_extra_blocks=True, - normalizations=[20., -1, -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, - 3], [128, 256, 1, 2, 3], - [128, 256, 1, 2, - 3], [128, 256, 1, 2, 3], - [128, 256, 1, 1, 4]]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # names of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # names of outputs - if get_prediction: - locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, - image=image, - num_classes=81, - **self.multi_box_head_config) - pred = fluid.layers.detection_output( - loc=locs, - scores=confs, - prior_box=box, - prior_box_var=box_var, - **self.output_decoder_config) - outputs = {'bbox_out': [var_prefix + pred.name]} - else: - outputs = { - 'body_features': - [var_prefix + var.name for var in body_feats] - } - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - outputs = { - out_key: [ - context_prog.global_block().vars[varname] - for varname in out_value - ] - for out_key, out_value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py b/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py deleted file mode 100644 index 6960d1efd3585b863969518a1b71b9309cc8c506..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """ - Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - bbox[0] = bbox[0] * org_img_width - bbox[1] = bbox[1] * org_img_height - bbox[2] = bbox[2] * org_img_width - bbox[3] = bbox[3] * org_img_height - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py b/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py deleted file mode 100644 index dc760f328947a16cddb0b1d0d2d7556fc651fbf7..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py +++ /dev/null @@ -1,224 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['VGG'] - - -class VGG(object): - """ - VGG, see https://arxiv.org/abs/1409.1556 - - Args: - depth (int): the VGG net depth (16 or 19) - normalizations (list): params list of init scale in l2 norm, skip init - scale if param is -1. - with_extra_blocks (bool): whether or not extra blocks should be added - extra_block_filters (list): in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=16, - with_extra_blocks=False, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], - [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]], - class_dim=1000): - assert depth in [16, 19], "depth {} not in [16, 19]" - self.depth = depth - self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} - self.with_extra_blocks = with_extra_blocks - self.normalizations = normalizations - self.extra_block_filters = extra_block_filters - self.class_dim = class_dim - - def __call__(self, input): - layers = [] - layers += self._vgg_block(input) - - if not self.with_extra_blocks: - return layers[-1] - - layers += self._add_extras_block(layers[-1]) - norm_cfg = self.normalizations - for k, v in enumerate(layers): - if not norm_cfg[k] == -1: - layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) - - return layers - - def _vgg_block(self, input): - nums = self.depth_cfg[self.depth] - vgg_base = [64, 128, 256, 512, 512] - conv = input - res_layer = [] - layers = [] - for k, v in enumerate(vgg_base): - conv = self._conv_block( - conv, v, nums[k], name="conv{}_".format(k + 1)) - layers.append(conv) - if self.with_extra_blocks: - if k == 4: - conv = self._pooling_block(conv, 3, 1, pool_padding=1) - else: - conv = self._pooling_block(conv, 2, 2) - else: - conv = self._pooling_block(conv, 2, 2) - if not self.with_extra_blocks: - fc_dim = 4096 - fc_name = ["fc6", "fc7", "fc8"] - fc1 = fluid.layers.fc( - input=conv, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_offset")) - fc2 = fluid.layers.fc( - input=fc1, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_offset")) - out = fluid.layers.fc( - input=fc2, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_offset")) - out = fluid.layers.softmax(out) - res_layer.append(out) - return [out] - else: - fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") - fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") - return [layers[3], fc7] - - def _add_extras_block(self, input): - cfg = self.extra_block_filters - conv = input - layers = [] - for k, v in enumerate(cfg): - assert len(v) == 5, "extra_block_filters size not fix" - conv = self._extra_block( - conv, - v[0], - v[1], - v[2], - v[3], - v[4], - name="conv{}_".format(6 + k)) - layers.append(conv) - - return layers - - def _conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = self._conv_layer( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=name + str(i + 1)) - return conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - padding_size, - stride_size, - filter_size, - name=None): - # 1x1 conv - conv_1 = self._conv_layer( - input=input, - num_filters=int(num_filters1), - filter_size=1, - stride=1, - act='relu', - padding=0, - name=name + "1") - - # 3x3 conv - conv_2 = self._conv_layer( - input=conv_1, - num_filters=int(num_filters2), - filter_size=filter_size, - stride=stride_size, - act='relu', - padding=padding_size, - name=name + "2") - return conv_2 - - def _conv_layer(self, - input, - num_filters, - filter_size, - stride, - padding, - dilation=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr( - name=name + "_biases") if self.with_extra_blocks else False, - name=name + '.conv2d.output.1') - return conv - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): - from paddle.fluid.layer_helper import LayerHelper - from paddle.fluid.initializer import Constant - helper = LayerHelper("Scale") - l2_norm = fluid.layers.l2_normalize( - input, axis=1) # l2 norm along channel - shape = [1] if channel_shared else [input.shape[1]] - scale = helper.create_parameter( - attr=helper.param_attr, - shape=shape, - dtype=input.dtype, - default_initializer=Constant(init_scale)) - out = fluid.layers.elementwise_mul( - x=l2_norm, - y=scale, - axis=-1 if channel_shared else 1, - name="conv4_3_norm_scale") - return out diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py b/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py deleted file mode 100644 index 754a7f9f9c7ddb37edb10069ead255fb4d76b6e0..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py +++ /dev/null @@ -1,168 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """ - Get the backbone of DarkNet, that is output for the 5 stages. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py b/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/module.py b/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/module.py deleted file mode 100644 index 1b693fb27c695e14030e81c3f3e623f7d04a0651..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/module.py +++ /dev/null @@ -1,317 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_darknet53_coco2017.darknet import DarkNet -from yolov3_darknet53_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_darknet53_coco2017.data_feed import reader -from yolov3_darknet53_coco2017.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_darknet53_coco2017", - version="1.1.1", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection, with backbone DarkNet53, trained with dataset coco2017.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Coco2017(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - data=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] - - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py b/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py deleted file mode 100644 index a6a91b32724c10aaed9be4498eba4311fdc7a2e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py b/hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py deleted file mode 100644 index 079d89452eb55c9669ad3b3a3f5674e31fafc6d6..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py +++ /dev/null @@ -1,318 +0,0 @@ -import os - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn.initializer import Normal, Constant -from paddle.regularizer import L2Decay -from paddlehub.module.cv_module import Yolov3Module -import paddlehub.process.detect_transforms as T -from paddlehub.module.module import moduleinfo - - -class ConvBNLayer(nn.Layer): - """Basic block for Darknet""" - def __init__(self, - ch_in: int, - ch_out: int, - filter_size: int = 3, - stride: int = 1, - groups: int = 1, - padding: int = 0, - act: str = 'leakly', - is_test: bool = False): - super(ConvBNLayer, self).__init__() - - self.conv = nn.Conv2d(ch_in, - ch_out, - filter_size, - padding=padding, - stride=stride, - groups=groups, - weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), - bias_attr=False) - - self.batch_norm = nn.BatchNorm(num_channels=ch_out, - is_test=is_test, - param_attr=paddle.ParamAttr(initializer=Normal(0., 0.02), - regularizer=L2Decay(0.))) - self.act = act - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - out = self.conv(inputs) - out = self.batch_norm(out) - if self.act == "leakly": - out = F.leaky_relu(x=out, negative_slope=0.1) - return out - - -class DownSample(nn.Layer): - """Downsample block for Darknet""" - def __init__(self, - ch_in: int, - ch_out: int, - filter_size: int = 3, - stride: int = 2, - padding: int = 1, - is_test: bool = False): - super(DownSample, self).__init__() - - self.conv_bn_layer = ConvBNLayer(ch_in=ch_in, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - is_test=is_test) - self.ch_out = ch_out - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - out = self.conv_bn_layer(inputs) - return out - - -class BasicBlock(nn.Layer): - """Basic residual block for Darknet""" - def __init__(self, ch_in: int, ch_out: int, is_test: bool = False): - super(BasicBlock, self).__init__() - - self.conv1 = ConvBNLayer(ch_in=ch_in, ch_out=ch_out, filter_size=1, stride=1, padding=0, is_test=is_test) - self.conv2 = ConvBNLayer(ch_in=ch_out, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - conv1 = self.conv1(inputs) - conv2 = self.conv2(conv1) - out = paddle.elementwise_add(x=inputs, y=conv2, act=None) - return out - - -class LayerWarp(nn.Layer): - """Warp layer composed by basic residual blocks""" - def __init__(self, ch_in: int, ch_out: int, count: int, is_test: bool = False): - super(LayerWarp, self).__init__() - self.basicblock0 = BasicBlock(ch_in, ch_out, is_test=is_test) - self.res_out_list = [] - for i in range(1, count): - res_out = self.add_sublayer("basic_block_%d" % (i), BasicBlock(ch_out * 2, ch_out, is_test=is_test)) - self.res_out_list.append(res_out) - self.ch_out = ch_out - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - y = self.basicblock0(inputs) - for basic_block_i in self.res_out_list: - y = basic_block_i(y) - return y - - -class DarkNet53_conv_body(nn.Layer): - """Darknet53 - Args: - ch_in(int): Input channels, default is 3. - is_test (bool): Set the test mode, default is True. - """ - def __init__(self, ch_in: int = 3, is_test: bool = False): - super(DarkNet53_conv_body, self).__init__() - self.stages = [1, 2, 8, 8, 4] - self.stages = self.stages[0:5] - - self.conv0 = ConvBNLayer(ch_in=ch_in, ch_out=32, filter_size=3, stride=1, padding=1, is_test=is_test) - - self.downsample0 = DownSample(ch_in=32, ch_out=32 * 2, is_test=is_test) - self.darknet53_conv_block_list = [] - self.downsample_list = [] - ch_in = [64, 128, 256, 512, 1024] - - for i, stage in enumerate(self.stages): - conv_block = self.add_sublayer("stage_%d" % (i), - LayerWarp(int(ch_in[i]), 32 * (2**i), stage, is_test=is_test)) - self.darknet53_conv_block_list.append(conv_block) - - for i in range(len(self.stages) - 1): - downsample = self.add_sublayer( - "stage_%d_downsample" % i, DownSample(ch_in=32 * (2**(i + 1)), - ch_out=32 * (2**(i + 2)), - is_test=is_test)) - self.downsample_list.append(downsample) - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - out = self.conv0(inputs) - out = self.downsample0(out) - blocks = [] - for i, conv_block_i in enumerate(self.darknet53_conv_block_list): - out = conv_block_i(out) - blocks.append(out) - if i < len(self.stages) - 1: - out = self.downsample_list[i](out) - return blocks[-1:-4:-1] - - -class YoloDetectionBlock(nn.Layer): - """Basic block for Yolov3""" - def __init__(self, ch_in: int, channel: int, is_test: bool = True): - super(YoloDetectionBlock, self).__init__() - - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2".format(channel) - - self.conv0 = ConvBNLayer(ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) - self.conv1 = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - self.conv2 = ConvBNLayer(ch_in=channel * 2, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) - self.conv3 = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - self.route = ConvBNLayer(ch_in=channel * 2, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) - self.tip = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - - def forward(self, inputs): - out = self.conv0(inputs) - out = self.conv1(out) - out = self.conv2(out) - out = self.conv3(out) - route = self.route(out) - tip = self.tip(route) - return route, tip - - -class Upsample(nn.Layer): - """Upsample block for Yolov3""" - def __init__(self, scale: int = 2): - super(Upsample, self).__init__() - self.scale = scale - - def forward(self, inputs: paddle.Tensor): - shape_nchw = paddle.to_tensor(inputs.shape) - shape_hw = paddle.slice(shape_nchw, axes=[0], starts=[2], ends=[4]) - shape_hw.stop_gradient = True - in_shape = paddle.cast(shape_hw, dtype='int32') - out_shape = in_shape * self.scale - out_shape.stop_gradient = True - out = F.resize_nearest(input=inputs, scale=self.scale, actual_shape=out_shape) - return out - - -@moduleinfo(name="yolov3_darknet53_pascalvoc", - type="CV/image_editing", - author="paddlepaddle", - author_email="", - summary="Yolov3 is a detection model, this module is trained with VOC dataset.", - version="1.0.0", - meta=Yolov3Module) -class YOLOv3(nn.Layer): - """YOLOV3 for detection - - Args: - ch_in(int): Input channels, default is 3. - class_num(int): Categories for detection,if dataset is voc, class_num is 20. - ignore_thresh(float): The ignore threshold to ignore confidence loss. - valid_thresh(float): Threshold to filter out bounding boxes with low confidence score. - nms_topk(int): Maximum number of detections to be kept according to the confidences after the filtering - detections based on score_threshold. - nms_posk(int): Number of total bboxes to be kept per image after NMS step. -1 means keeping all bboxes after NMS - step. - nms_thresh (float): The threshold to be used in NMS. Default: 0.3. - is_train (bool): Set the train mode, default is True. - load_checkpoint(str): Whether to load checkpoint. - """ - def __init__(self, - ch_in: int = 3, - class_num: int = 20, - ignore_thresh: float = 0.7, - valid_thresh: float = 0.005, - nms_topk: int = 400, - nms_posk: int = 100, - nms_thresh: float = 0.45, - is_train: bool = True, - load_checkpoint: str = None): - super(YOLOv3, self).__init__() - - self.is_train = is_train - self.block = DarkNet53_conv_body(ch_in=ch_in, is_test=not self.is_train) - self.block_outputs = [] - self.yolo_blocks = [] - self.route_blocks_2 = [] - self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] - self.class_num = class_num - self.ignore_thresh = ignore_thresh - self.valid_thresh = valid_thresh - self.nms_topk = nms_topk - self.nms_posk = nms_posk - self.nms_thresh = nms_thresh - ch_in_list = [1024, 768, 384] - - for i in range(3): - yolo_block = self.add_sublayer( - "yolo_detecton_block_%d" % (i), - YoloDetectionBlock(ch_in_list[i], channel=512 // (2**i), is_test=not self.is_train)) - self.yolo_blocks.append(yolo_block) - - num_filters = len(self.anchor_masks[i]) * (self.class_num + 5) - block_out = self.add_sublayer( - "block_out_%d" % (i), - nn.Conv2d(1024 // (2**i), - num_filters, - 1, - stride=1, - padding=0, - weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), - bias_attr=paddle.ParamAttr(initializer=Constant(0.0), regularizer=L2Decay(0.)))) - self.block_outputs.append(block_out) - - if i < 2: - route = self.add_sublayer( - "route2_%d" % i, - ConvBNLayer(ch_in=512 // (2**i), - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not self.is_train))) - self.route_blocks_2.append(route) - self.upsample = Upsample() - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'yolov3_darknet53_voc.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \ - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def transform(self, img): - if self.is_train: - transform = T.Compose([ - T.RandomDistort(), - T.RandomExpand(fill=[0.485, 0.456, 0.406]), - T.RandomCrop(), - T.Resize(target_size=416), - T.RandomFlip(), - T.ShuffleBox(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - else: - transform = T.Compose([ - T.Resize(target_size=416, interp='CUBIC'), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - - return transform(img) - - def forward(self, inputs: paddle.Tensor): - outputs = [] - blocks = self.block(inputs) - route = None - for i, block in enumerate(blocks): - if i > 0: - block = paddle.concat([route, block], axis=1) - route, tip = self.yolo_blocks[i](block) - block_out = self.block_outputs[i](tip) - outputs.append(block_out) - if i < 2: - route = self.route_blocks_2[i](route) - route = self.upsample(route) - - return outputs diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py b/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py deleted file mode 100644 index 1e6e6f8f6b097314c6e3363150ddcfbb9fb59e0b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """Get the backbone of DarkNet, that is output for the 5 stages. - - :param input: Variable of input image - :type input: Variable - :Returns: The last variables of each stage. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py b/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py b/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py deleted file mode 100644 index 7630d372801bb633d765ba966bb2eaa458df58e0..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py +++ /dev/null @@ -1,327 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_darknet53_pedestrian.darknet import DarkNet -from yolov3_darknet53_pedestrian.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_darknet53_pedestrian.data_feed import reader -from yolov3_darknet53_pedestrian.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_darknet53_pedestrian", - version="1.0.1", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Pedestrian(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_pedestrian_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head( - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - norm_decay=0., - num_classes=1, - ignore_thresh=0.7, - label_smooth=True, - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=False, - score_threshold=0.01)) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='yolov3_pedestrian_detect_output', - score_thresh=0.2, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of pedestrian detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='yolov3_pedestrian_detect_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.2, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py b/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py deleted file mode 100644 index a6a91b32724c10aaed9be4498eba4311fdc7a2e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py b/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py deleted file mode 100644 index 1e6e6f8f6b097314c6e3363150ddcfbb9fb59e0b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """Get the backbone of DarkNet, that is output for the 5 stages. - - :param input: Variable of input image - :type input: Variable - :Returns: The last variables of each stage. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py b/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/module.py b/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/module.py deleted file mode 100644 index 801228f6b576b34bfcf8de04d0f56f956d17c709..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/module.py +++ /dev/null @@ -1,327 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_darknet53_vehicles.darknet import DarkNet -from yolov3_darknet53_vehicles.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_darknet53_vehicles.data_feed import reader -from yolov3_darknet53_vehicles.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_darknet53_vehicles", - version="1.0.1", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Vehicles(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_vehicles_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head( - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - anchors=[[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], - [54, 50], [101, 80], [139, 145], [253, 224]], - norm_decay=0., - num_classes=6, - ignore_thresh=0.7, - label_smooth=False, - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=400, - normalized=False, - score_threshold=0.005)) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='yolov3_vehicles_detect_output', - score_thresh=0.2, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='yolov3_vehicles_detect_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.2, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py b/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py deleted file mode 100644 index a6a91b32724c10aaed9be4498eba4311fdc7a2e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/darknet.py b/hub_module/modules/image/object_detection/yolov3_darknet53_venus/darknet.py deleted file mode 100644 index 754a7f9f9c7ddb37edb10069ead255fb4d76b6e0..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/darknet.py +++ /dev/null @@ -1,168 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """ - Get the backbone of DarkNet, that is output for the 5 stages. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/data_feed.py b/hub_module/modules/image/object_detection/yolov3_darknet53_venus/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/module.py b/hub_module/modules/image/object_detection/yolov3_darknet53_venus/module.py deleted file mode 100644 index 29df4f4a66d1fa3232166b781257086f62940d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/module.py +++ /dev/null @@ -1,125 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_darknet53_venus.darknet import DarkNet -from yolov3_darknet53_venus.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_darknet53_venus.data_feed import reader -from yolov3_darknet53_venus.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_darknet53_venus", - version="1.0.0", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection, with backbone DarkNet53, trained with Baidu self-built dataset.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Venus(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_model") - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=708) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/processor.py b/hub_module/modules/image/object_detection/yolov3_darknet53_venus/processor.py deleted file mode 100644 index fa9ecf59451671e3435e150e2c0d3d79a6993d3f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return xmin, ymin, xmax, ymax - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = confidence - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_darknet53_venus/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py b/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py b/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py deleted file mode 100644 index 05f64c9382b8630e41bac0546f67dcb83d7d4822..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py +++ /dev/null @@ -1,194 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MobileNet'] - - -class MobileNet(object): - """ - MobileNet v1, see https://arxiv.org/abs/1704.04861 - - Args: - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - conv_group_scale (int): scaling factor for convolution groups - with_extra_blocks (bool): if extra blocks should be added - extra_block_filters (list): number of filter for each extra block - """ - __shared__ = ['norm_type', 'weight_prefix_name'] - - def __init__(self, - norm_type='bn', - norm_decay=0., - conv_group_scale=1, - conv_learning_rate=1.0, - with_extra_blocks=False, - extra_block_filters=[[256, 512], [128, 256], [128, 256], - [64, 128]], - weight_prefix_name=''): - self.norm_type = norm_type - self.norm_decay = norm_decay - self.conv_group_scale = conv_group_scale - self.conv_learning_rate = conv_learning_rate - self.with_extra_blocks = with_extra_blocks - self.extra_block_filters = extra_block_filters - self.prefix_name = weight_prefix_name - - def _conv_norm(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=self.conv_learning_rate, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - - bn_name = name + "_bn" - norm_decay = self.norm_decay - bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def depthwise_separable(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): - depthwise_conv = self._conv_norm( - input=input, - filter_size=3, - num_filters=int(num_filters1 * scale), - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False, - name=name + "_dw") - - pointwise_conv = self._conv_norm( - input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - return pointwise_conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - name=None): - pointwise_conv = self._conv_norm( - input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - num_groups=int(num_groups), - padding=0, - name=name + "_extra1") - normal_conv = self._conv_norm( - input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=2, - num_groups=int(num_groups), - padding=1, - name=name + "_extra2") - return normal_conv - - def __call__(self, input): - scale = self.conv_group_scale - - blocks = [] - # input 1/1 - out = self._conv_norm( - input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") - # 1/2 - out = self.depthwise_separable( - out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") - out = self.depthwise_separable( - out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") - # 1/4 - out = self.depthwise_separable( - out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") - out = self.depthwise_separable( - out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") - # 1/8 - blocks.append(out) - out = self.depthwise_separable( - out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") - out = self.depthwise_separable( - out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") - # 1/16 - blocks.append(out) - for i in range(5): - out = self.depthwise_separable( - out, - 512, - 512, - 512, - 1, - scale, - name=self.prefix_name + "conv5_" + str(i + 1)) - module11 = out - - out = self.depthwise_separable( - out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") - # 1/32 - out = self.depthwise_separable( - out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") - module13 = out - blocks.append(out) - if not self.with_extra_blocks: - return blocks - - num_filters = self.extra_block_filters - module14 = self._extra_block(module13, num_filters[0][0], - num_filters[0][1], 1, 2, - self.prefix_name + "conv7_1") - module15 = self._extra_block(module14, num_filters[1][0], - num_filters[1][1], 1, 2, - self.prefix_name + "conv7_2") - module16 = self._extra_block(module15, num_filters[2][0], - num_filters[2][1], 1, 2, - self.prefix_name + "conv7_3") - module17 = self._extra_block(module16, num_filters[3][0], - num_filters[3][1], 1, 2, - self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py b/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py deleted file mode 100644 index 659cde37ca481f524ebd33daedc80eb3adc24b3a..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py +++ /dev/null @@ -1,317 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_mobilenet_v1_coco2017.mobilenet_v1 import MobileNet -from yolov3_mobilenet_v1_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_mobilenet_v1_coco2017.data_feed import reader -from yolov3_mobilenet_v1_coco2017.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_mobilenet_v1_coco2017", - version="1.0.1", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3MobileNetV1Coco2017(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_mobilenet_v1_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = MobileNet( - norm_type='sync_bn', - norm_decay=0., - conv_group_scale=1, - with_extra_blocks=False) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py b/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py deleted file mode 100644 index a6a91b32724c10aaed9be4498eba4311fdc7a2e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py b/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/module.py b/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/module.py deleted file mode 100644 index 14e31fdd667d3fc9eaa78f185d0c57984fb7ebe0..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/module.py +++ /dev/null @@ -1,319 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_resnet34_coco2017.resnet import ResNet -from yolov3_resnet34_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_resnet34_coco2017.data_feed import reader -from yolov3_resnet34_coco2017.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_resnet34_coco2017", - version="1.0.1", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection with backbone ResNet34, trained with dataset coco2017.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3ResNet34Coco2017(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_resnet34_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = ResNet( - norm_type='bn', - freeze_at=0, - freeze_norm=False, - norm_decay=0., - depth=34, - feature_maps=[3, 4, 5]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py b/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py b/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py deleted file mode 100644 index a6a91b32724c10aaed9be4498eba4311fdc7a2e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py b/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/data_feed.py b/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/data_feed.py deleted file mode 100644 index d4d35611df9ade3fa6bbd60c04320bec05194d1d..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/data_feed.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os - -import cv2 -import numpy as np - -__all__ = ['reader'] - - -def reader(paths=[], images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (list): preprocessed image and the size of original image. - """ - img_list = [] - if paths: - assert type(paths) is list, "type(paths) is not list." - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - - for im in img_list: - # im_size - im_shape = im.shape - im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) - - # decode image - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - # resize image - target_size = 608 - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('min size of image is 0') - - im_scale_x = float(target_size) / float(im_shape[1]) - im_scale_y = float(target_size) / float(im_shape[0]) - im = cv2.resize( - im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) - - # normalize image - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - - # permute - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - - yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py b/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py deleted file mode 100644 index 5c25f17cd37df1a7e67dba76ade582ddbc0b89e5..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py +++ /dev/null @@ -1,321 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import ast -import argparse -import os -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_resnet50_vd_coco2017.resnet import ResNet -from yolov3_resnet50_vd_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_resnet50_vd_coco2017.data_feed import reader -from yolov3_resnet50_vd_coco2017.yolo_head import MultiClassNMS, YOLOv3Head - - -@moduleinfo( - name="yolov3_resnet50_vd_coco2017", - version="1.0.1", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for object detection with backbone ResNet50, trained with dataset coco2017.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class YOLOv3ResNet50Coco2017(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_resnet50_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) - self._set_config() - - def _set_config(self): - """ - predictor config setting. - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = ResNet( - norm_type='sync_bn', - freeze_at=0, - freeze_norm=False, - norm_decay=0., - dcn_v2_stages=[5], - depth=50, - variant='d', - feature_maps=[3, 4, 5]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - - def object_detection(self, - paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - paths = paths if paths else list() - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) - res = [] - for iter_id, feed_data in enumerate(batch_reader()): - feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) - res.extend(output) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py b/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py b/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py deleted file mode 100644 index a6a91b32724c10aaed9be4498eba4311fdc7a2e8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py +++ /dev/null @@ -1,180 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - - image.save(save_name) - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - handle_id (int): The number of images that have been handled. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - check_dir(output_dir) - - assert type(paths) is list, "type(paths) is not list." - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output = list() - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) - - return output diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py b/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py b/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/hub_module/modules/image/semantic_segmentation/ace2p/data_feed.py b/hub_module/modules/image/semantic_segmentation/ace2p/data_feed.py deleted file mode 100644 index 3ec33777a111d73976080d2e509cec9d39f0856f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/semantic_segmentation/ace2p/data_feed.py +++ /dev/null @@ -1,101 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np - -from ace2p.processor import get_direction, get_3rd_point, get_affine_transform - -__all__ = ['reader'] - - -def _box2cs(box, aspect_ratio): - x, y, w, h = box[:4] - return _xywh2cs(x, y, w, h, aspect_ratio) - - -def _xywh2cs(x, y, w, h, aspect_ratio, pixel_std=200): - center = np.zeros((2), dtype=np.float32) - center[0] = x + w * 0.5 - center[1] = y + h * 0.5 - if w > aspect_ratio * h: - h = w * 1.0 / aspect_ratio - elif w < aspect_ratio * h: - w = h * aspect_ratio - scale = np.array([w * 1.0 / pixel_std, h * 1.0 / pixel_std], - dtype=np.float32) - return center, scale - - -def preprocess(org_im, scale, rotation): - image = org_im.copy() - image_height, image_width, _ = image.shape - - aspect_ratio = scale[1] * 1.0 / scale[0] - image_center, image_scale = _box2cs( - [0, 0, image_width - 1, image_height - 1], aspect_ratio) - - trans = get_affine_transform(image_center, image_scale, rotation, scale) - image = cv2.warpAffine( - image, - trans, (int(scale[1]), int(scale[0])), - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(0, 0, 0)) - - img_mean = np.array([0.406, 0.456, 0.485]).reshape((1, 1, 3)) - img_std = np.array([0.225, 0.224, 0.229]).reshape((1, 1, 3)) - image = image.astype(np.float) - image = (image / 255.0 - img_mean) / img_std - image = image.transpose(2, 0, 1).astype(np.float32) - - image_info = { - 'image_center': image_center, - 'image_height': image_height, - 'image_width': image_width, - 'image_scale': image_scale, - 'rotation': rotation, - 'scale': scale - } - - return image, image_info - - -def reader(images, paths, scale, rotation): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): paths to images. - scale (tuple): size of preprocessed image. - rotation (int): rotation angle, used for obtaining affine matrix in preprocess. - - Yield: - element (collections.OrderedDict): info of original image and preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path) - each['org_im'] = im - each['org_im_path'] = im_path - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}.jpg'.format( - round(time.time(), 6) * 1e6) - component.append(each) - - for element in component: - element['image'], element['image_info'] = preprocess( - element['org_im'], scale, rotation) - yield element diff --git a/hub_module/modules/image/semantic_segmentation/ace2p/module.py b/hub_module/modules/image/semantic_segmentation/ace2p/module.py deleted file mode 100644 index d8908525ddd5c0c1d98a0f9ce0dddff453a9c128..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/semantic_segmentation/ace2p/module.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import argparse -import os - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from ace2p.processor import get_palette, postprocess, base64_to_cv2, cv2_to_base64 -from ace2p.data_feed import reader - - -@moduleinfo( - name="ace2p", - type="CV/semantic-segmentation", - author="baidu-idl", - author_email="", - summary="ACE2P is an image segmentation model for human parsing solution.", - version="1.1.0") -class ACE2P(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ace2p_human_parsing") - # label list - label_list_file = os.path.join(self.directory, 'label_list.txt') - with open(label_list_file, "r") as file: - content = file.read() - self.label_list = content.split("\n") - # palette used in postprocess - self.palette = get_palette(len(self.label_list)) - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def segmentation(self, - images=None, - paths=None, - data=None, - batch_size=1, - use_gpu=False, - output_dir='ace2p_output', - visualization=False): - """ - API for human parsing. - - Args: - images (list[numpy.ndarray]): images data, shape of each is [H, W, C], color space is BGR. - paths (list[str]): The paths of images. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save output images or not. - - Returns: - res (list[dict]): The result of human parsing and original path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data and 'image' in data: - if paths is None: - paths = [] - paths += data['image'] - - # get all data - all_data = [] - scale = (473, 473) # size of preprocessed image. - rotation = 0 # rotation angle, used for obtaining affine matrix in preprocess. - for yield_data in reader(images, paths, scale, rotation): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = [] - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.astype('float32')) - data_out = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - # postprocess one by one - for i in range(len(batch_data)): - out = postprocess( - data_out=data_out[0].as_ndarray()[i], - org_im=batch_data[i]['org_im'], - org_im_path=batch_data[i]['org_im_path'], - image_info=batch_data[i]['image_info'], - output_dir=output_dir, - visualization=visualization, - palette=self.palette) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.segmentation(images_decode, **kwargs) - results = [{ - 'data': cv2_to_base64(result['data']) - } for result in results] - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.segmentation( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='ace2p_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/semantic_segmentation/ace2p/processor.py b/hub_module/modules/image/semantic_segmentation/ace2p/processor.py deleted file mode 100644 index dd036b76ed50a5e35d9f5ab6eca759ef9ec3009f..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/semantic_segmentation/ace2p/processor.py +++ /dev/null @@ -1,193 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -import base64 -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['cv2_to_base64', 'base64_to_cv2', 'get_palette', 'postprocess'] - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def check_dir(dir_path): - """ - Create directory to save processed image. - - Args: - dir_path (str): directory path to save images. - """ - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of orginal image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - ext = '.png' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path - - -def get_direction(src_point, rot_rad): - sn, cs = np.sin(rot_rad), np.cos(rot_rad) - src_result = [0, 0] - src_result[0] = src_point[0] * cs - src_point[1] * sn - src_result[1] = src_point[0] * sn + src_point[1] * cs - return src_result - - -def get_3rd_point(a, b): - direct = a - b - return b + np.array([-direct[1], direct[0]], dtype=np.float32) - - -def get_affine_transform(center, - scale, - rot, - output_size, - shift=np.array([0, 0], dtype=np.float32), - inv=0): - if not isinstance(scale, np.ndarray) and not isinstance( - scale, list) and not isinstance(scale, tuple): - print(scale) - scale = np.array([scale, scale]) - - scale_tmp = scale * 200.0 - src_w = scale_tmp[0] - dst_w = output_size[1] - dst_h = output_size[0] - rot_rad = np.pi * rot / 180 - src_direction = get_direction([0, src_w * -0.5], rot_rad) - dst_direction = np.array([0, (dst_w - 1) * -0.5], np.float32) - src = np.zeros((3, 2), dtype=np.float32) - dst = np.zeros((3, 2), dtype=np.float32) - src[0, :] = center + scale_tmp * shift - src[1, :] = center + src_direction + scale_tmp * shift - dst[0, :] = [(dst_w - 1) * 0.5, (dst_h - 1) * 0.5] - dst[1, :] = np.array([(dst_w - 1) * 0.5, (dst_h - 1) * 0.5]) + dst_direction - src[2:, :] = get_3rd_point(src[0, :], src[1, :]) - dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) - - if inv: - trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) - else: - trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) - return trans - - -def transform_logits(logits, center, scale, width, height, input_size): - trans = get_affine_transform(center, scale, 0, input_size, inv=1) - channel = logits.shape[2] - target_logits = [] - for i in range(channel): - target_logit = cv2.warpAffine( - logits[:, :, i], - trans, (int(width), int(height)), - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(0)) - target_logits.append(target_logit) - target_logits = np.stack(target_logits, axis=2) - return target_logits - - -def get_palette(num_cls): - """ - Returns the color map for visualizing the segmentation mask. - - Args: - num_cls: Number of classes - - Returns: - The color map - """ - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette - - -def postprocess(data_out, org_im, org_im_path, image_info, output_dir, - visualization, palette): - """ - Postprocess output of network. one image at a time. - - Args: - data_out (numpy.ndarray): output of neural network. - org_im (numpy.ndarray): orginal image. - org_im_path (str): path of original image. - image_info (dict): info about the preprocessed image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - palette (list): The palette to draw. - - Returns: - res (list[dict]): keys contain 'path', 'data', the corresponding value is: - path (str): The path of original image. - data (numpy.ndarray): The postprocessed image data, only the alpha channel. - """ - result = dict() - result['path'] = org_im_path - - image_center = image_info['image_center'] - image_scale = image_info['image_scale'] - image_width = image_info['image_width'] - image_height = image_info['image_height'] - scale = image_info['scale'] - - data_out = np.squeeze(data_out) - data_out = np.transpose(data_out, [1, 2, 0]) - logits_result = transform_logits(data_out, image_center, image_scale, - image_width, image_height, scale) - parsing = np.argmax(logits_result, axis=2) - parsing_im = np.asarray(parsing, dtype=np.uint8) - result['data'] = parsing_im - - if visualization: - check_dir(output_dir) - save_im_path = get_save_image_name(org_im, org_im_path, output_dir) - parsing_im = Image.fromarray(parsing_im) - parsing_im.putpalette(palette) - parsing_im.save(save_im_path) - - return result diff --git a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py b/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py deleted file mode 100644 index 9972d332bda281e56b67c36862ed6e573bd52ff2..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py +++ /dev/null @@ -1,52 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - - -def reader(images=None, paths=None): - """ - Preprocess to yield image. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - paths (list[str]): paths to images. - - Yield: - each (collections.OrderedDict): info of original image, preprocessed image. - """ - component = list() - if paths: - for im_path in paths: - each = OrderedDict() - assert os.path.isfile( - im_path), "The {} isn't a valid file path.".format(im_path) - im = cv2.imread(im_path).astype('float32') - each['org_im'] = im - each['org_im_path'] = im_path - each['org_im_shape'] = im.shape - component.append(each) - if images is not None: - assert type(images) is list, "images should be a list." - for im in images: - each = OrderedDict() - each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format( - round(time.time(), 6) * 1e6) - each['org_im_shape'] = im.shape - component.append(each) - - for element in component: - img = element['org_im'].copy() - img = cv2.resize(img, (513, 513)).astype(np.float32) - img -= np.array([104.008, 116.669, 122.675]) - img /= np.array([1.0, 1.0, 1.0]) - img = img.transpose((2, 0, 1)) - element['image'] = img - yield element diff --git a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py b/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py deleted file mode 100644 index f2d5977247c8ad8ec865afc61eee0214d2dd46c9..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py +++ /dev/null @@ -1,230 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import os -import argparse - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from deeplabv3p_xception65_humanseg.processor import postprocess, base64_to_cv2, cv2_to_base64 -from deeplabv3p_xception65_humanseg.data_feed import reader - - -@moduleinfo( - name="deeplabv3p_xception65_humanseg", - type="CV/semantic_segmentation", - author="baidu-vis", - author_email="", - summary="DeepLabv3+ is a semantic segmentation model.", - version="1.1.1") -class DeeplabV3pXception65HumanSeg(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "deeplabv3p_xception65_humanseg_model") - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def segmentation(self, - images=None, - paths=None, - data=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_output'): - """ - API for human segmentation. - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. - paths (list[str]): The paths of images. - data (dict): key is 'image', the corresponding value is the path to image. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - visualization (bool): Whether to save image or not. - output_dir (str): The path to store output images. - - Returns: - res (list[dict]): each element in the list is a dict, the keys and values are: - save_path (str, optional): the path to save images. (Exists only if visualization is True) - data (numpy.ndarray): data of post processed image. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - # compatibility with older versions - if data and 'image' in data: - if paths is None: - paths = list() - paths += data['image'] - - all_data = list() - for yield_data in reader(images, paths): - all_data.append(yield_data) - - total_num = len(all_data) - loop_num = int(np.ceil(total_num / batch_size)) - - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_data[handle_id + image_id]) - except: - pass - # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([ - batch_image - ]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) - # postprocess one by one - for i in range(len(batch_data)): - out = postprocess( - data_out=output[i], - org_im=batch_data[i]['org_im'], - org_im_shape=batch_data[i]['org_im_shape'], - org_im_path=batch_data[i]['org_im_path'], - output_dir=output_dir, - visualization=visualization) - res.append(out) - return res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.segmentation(images=images_decode, **kwargs) - results = [{ - 'data': cv2_to_base64(result['data']) - } for result in results] - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - results = self.segmentation( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='humanseg_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - self.arg_config_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - - -if __name__ == "__main__": - m = DeeplabV3pXception65HumanSeg() - import cv2 - img = cv2.imread('./meditation.jpg') - res = m.segmentation(images=[img]) - print(res[0]['data']) diff --git a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py b/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py deleted file mode 100644 index ce07041294861513ee28fa7503ac139f2f7555a5..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py +++ /dev/null @@ -1,93 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from collections import OrderedDict - -import base64 -import cv2 -import numpy as np - -__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def postprocess(data_out, - org_im, - org_im_shape, - org_im_path, - output_dir, - visualization, - thresh=120): - """ - Postprocess output of network. one image at a time. - - Args: - data_out (numpy.ndarray): output of network. - org_im (numpy.ndarray): original image. - org_im_shape (list): shape pf original image. - org_im_path (list): path of riginal image. - output_dir (str): output directory to store image. - visualization (bool): whether to save image or not. - thresh (float): threshold. - - Returns: - result (dict): The data of processed image. - """ - result = dict() - for logit in data_out: - logit = logit[1] * 255 - logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) - logit -= thresh - logit[logit < 0] = 0 - logit = 255 * logit / (255 - thresh) - rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) - - if visualization: - check_dir(output_dir) - save_im_path = get_save_image_name(org_im, org_im_path, output_dir) - cv2.imwrite(save_im_path, rgba) - result['save_path'] = save_im_path - result['data'] = rgba[:, :, 3] - else: - result['data'] = rgba[:, :, 3] - return result - - -def check_dir(dir_path): - if not os.path.exists(dir_path): - os.makedirs(dir_path) - elif os.path.isfile(dir_path): - os.remove(dir_path) - os.makedirs(dir_path) - - -def get_save_image_name(org_im, org_im_path, output_dir): - """ - Get save image name from source image path. - """ - # name prefix of orginal image - org_im_name = os.path.split(org_im_path)[-1] - im_prefix = os.path.splitext(org_im_name)[0] - ext = '.png' - # save image path - save_im_path = os.path.join(output_dir, im_prefix + ext) - if os.path.exists(save_im_path): - save_im_path = os.path.join( - output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) - - return save_im_path diff --git a/hub_module/modules/image/style_transfer/msgnet/module.py b/hub_module/modules/image/style_transfer/msgnet/module.py deleted file mode 100644 index aa2f2d5b3d41e76fdf174bceb0289114ed3c8e01..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/style_transfer/msgnet/module.py +++ /dev/null @@ -1,349 +0,0 @@ -import os - -import paddle -import paddle.nn as nn -import numpy as np -import paddle.nn.functional as F - -from paddlehub.env import MODULE_HOME -from paddlehub.module.module import moduleinfo -from paddlehub.process.transforms import Compose, Resize, CenterCrop, SetType -from paddlehub.module.cv_module import StyleTransferModule - - -class GramMatrix(nn.Layer): - """Calculate gram matrix""" - def forward(self, y): - (b, ch, h, w) = y.shape - features = y.reshape((b, ch, w * h)) - features_t = features.transpose((0, 2, 1)) - gram = features.bmm(features_t) / (ch * h * w) - return gram - - -class ConvLayer(nn.Layer): - """Basic conv layer with reflection padding layer""" - def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int): - super(ConvLayer, self).__init__() - pad = int(np.floor(kernel_size / 2)) - self.reflection_pad = nn.Pad2D([pad, pad, pad, pad], mode='reflect') - self.conv2d = nn.Conv2D(in_channels, out_channels, kernel_size, stride) - - def forward(self, x: paddle.Tensor): - out = self.reflection_pad(x) - out = self.conv2d(out) - return out - - -class UpsampleConvLayer(nn.Layer): - """ - Upsamples the input and then does a convolution. This method gives better results compared to ConvTranspose2d. - ref: http://distill.pub/2016/deconv-checkerboard/ - - Args: - in_channels(int): Number of input channels. - out_channels(int): Number of output channels. - kernel_size(int): Number of kernel size. - stride(int): Number of stride. - upsample(int): Scale factor for upsample layer, default is None. - - Return: - img(paddle.Tensor): UpsampleConvLayer output. - """ - def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int, upsample=None): - super(UpsampleConvLayer, self).__init__() - self.upsample = upsample - if upsample: - self.upsample_layer = nn.Upsample(scale_factor=upsample) - self.pad = int(np.floor(kernel_size / 2)) - if self.pad != 0: - self.reflection_pad = nn.Pad2D([self.pad, self.pad, self.pad, self.pad], mode='reflect') - self.conv2d = nn.Conv2D(in_channels, out_channels, kernel_size, stride) - - def forward(self, x): - if self.upsample: - x = self.upsample_layer(x) - if self.pad != 0: - x = self.reflection_pad(x) - out = self.conv2d(x) - return out - - -class Bottleneck(nn.Layer): - """ Pre-activation residual block - Identity Mapping in Deep Residual Networks - ref https://arxiv.org/abs/1603.05027 - - Args: - inplanes(int): Number of input channels. - planes(int): Number of output channels. - stride(int): Number of stride. - downsample(int): Scale factor for downsample layer, default is None. - norm_layer(nn.Layer): Batch norm layer, default is nn.BatchNorm2D. - - Return: - img(paddle.Tensor): Bottleneck output. - """ - def __init__(self, - inplanes: int, - planes: int, - stride: int = 1, - downsample: int = None, - norm_layer: nn.Layer = nn.BatchNorm2D): - super(Bottleneck, self).__init__() - self.expansion = 4 - self.downsample = downsample - if self.downsample is not None: - self.residual_layer = nn.Conv2D(inplanes, planes * self.expansion, kernel_size=1, stride=stride) - conv_block = (norm_layer(inplanes), nn.ReLU(), nn.Conv2D(inplanes, planes, kernel_size=1, stride=1), - norm_layer(planes), nn.ReLU(), ConvLayer(planes, planes, kernel_size=3, stride=stride), - norm_layer(planes), nn.ReLU(), nn.Conv2D(planes, planes * self.expansion, kernel_size=1, - stride=1)) - self.conv_block = nn.Sequential(*conv_block) - - def forward(self, x: paddle.Tensor): - if self.downsample is not None: - residual = self.residual_layer(x) - else: - residual = x - m = self.conv_block(x) - return residual + self.conv_block(x) - - -class UpBottleneck(nn.Layer): - """ Up-sample residual block (from MSG-Net paper) - Enables passing identity all the way through the generator - ref https://arxiv.org/abs/1703.06953 - - Args: - inplanes(int): Number of input channels. - planes(int): Number of output channels. - stride(int): Number of stride, default is 2. - norm_layer(nn.Layer): Batch norm layer, default is nn.BatchNorm2D. - - Return: - img(paddle.Tensor): UpBottleneck output. - """ - def __init__(self, inplanes: int, planes: int, stride: int = 2, norm_layer: nn.Layer = nn.BatchNorm2D): - super(UpBottleneck, self).__init__() - self.expansion = 4 - self.residual_layer = UpsampleConvLayer(inplanes, - planes * self.expansion, - kernel_size=1, - stride=1, - upsample=stride) - conv_block = [] - conv_block += [norm_layer(inplanes), nn.ReLU(), nn.Conv2D(inplanes, planes, kernel_size=1, stride=1)] - conv_block += [ - norm_layer(planes), - nn.ReLU(), - UpsampleConvLayer(planes, planes, kernel_size=3, stride=1, upsample=stride) - ] - conv_block += [ - norm_layer(planes), - nn.ReLU(), - nn.Conv2D(planes, planes * self.expansion, kernel_size=1, stride=1) - ] - self.conv_block = nn.Sequential(*conv_block) - - def forward(self, x: paddle.Tensor): - return self.residual_layer(x) + self.conv_block(x) - - -class Inspiration(nn.Layer): - """ Inspiration Layer (from MSG-Net paper) - tuning the featuremap with target Gram Matrix - ref https://arxiv.org/abs/1703.06953 - - Args: - C(int): Number of input channels. - B(int): B is equal to 1 or input mini_batch, default is 1. - - Return: - img(paddle.Tensor): UpBottleneck output. - """ - def __init__(self, C: int, B: int = 1): - super(Inspiration, self).__init__() - - self.weight = self.weight = paddle.create_parameter(shape=[1, C, C], dtype='float32') - # non-parameter buffer - self.G = paddle.to_tensor(np.random.rand(B, C, C)) - self.C = C - - def setTarget(self, target: paddle.Tensor): - self.G = target - - def forward(self, X: paddle.Tensor): - # input X is a 3D feature map - self.P = paddle.bmm(self.weight.expand_as(self.G), self.G) - - x = paddle.bmm( - self.P.transpose((0, 2, 1)).expand((X.shape[0], self.C, self.C)), X.reshape( - (X.shape[0], X.shape[1], -1))).reshape(X.shape) - return x - - def __repr__(self): - return self.__class__.__name__ + '(' \ - + 'N x ' + str(self.C) + ')' - - -class Vgg16(nn.Layer): - """ First four layers from Vgg16.""" - def __init__(self): - super(Vgg16, self).__init__() - self.conv1_1 = nn.Conv2D(3, 64, kernel_size=3, stride=1, padding=1) - self.conv1_2 = nn.Conv2D(64, 64, kernel_size=3, stride=1, padding=1) - - self.conv2_1 = nn.Conv2D(64, 128, kernel_size=3, stride=1, padding=1) - self.conv2_2 = nn.Conv2D(128, 128, kernel_size=3, stride=1, padding=1) - - self.conv3_1 = nn.Conv2D(128, 256, kernel_size=3, stride=1, padding=1) - self.conv3_2 = nn.Conv2D(256, 256, kernel_size=3, stride=1, padding=1) - self.conv3_3 = nn.Conv2D(256, 256, kernel_size=3, stride=1, padding=1) - - self.conv4_1 = nn.Conv2D(256, 512, kernel_size=3, stride=1, padding=1) - self.conv4_2 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) - self.conv4_3 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) - - self.conv5_1 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) - self.conv5_2 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) - self.conv5_3 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) - - checkpoint = os.path.join(MODULE_HOME, 'msgnet', 'vgg16.pdparams') - if not os.path.exists(checkpoint): - os.system('wget https://bj.bcebos.com/paddlehub/model/image/image_editing/vgg_paddle.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint) - self.set_dict(model_dict) - print("load pretrained vgg16 checkpoint success") - - def forward(self, X): - h = F.relu(self.conv1_1(X)) - h = F.relu(self.conv1_2(h)) - relu1_2 = h - h = F.max_pool2d(h, kernel_size=2, stride=2) - - h = F.relu(self.conv2_1(h)) - h = F.relu(self.conv2_2(h)) - relu2_2 = h - h = F.max_pool2d(h, kernel_size=2, stride=2) - - h = F.relu(self.conv3_1(h)) - h = F.relu(self.conv3_2(h)) - h = F.relu(self.conv3_3(h)) - relu3_3 = h - h = F.max_pool2d(h, kernel_size=2, stride=2) - - h = F.relu(self.conv4_1(h)) - h = F.relu(self.conv4_2(h)) - h = F.relu(self.conv4_3(h)) - relu4_3 = h - - return [relu1_2, relu2_2, relu3_3, relu4_3] - - -@moduleinfo( - name="msgnet", - type="CV/image_editing", - author="baidu-vis", - author_email="", - summary="Msgnet is a image colorization style transfer model, this module is trained with COCO2014 dataset.", - version="1.0.0", - meta=StyleTransferModule) -class MSGNet(nn.Layer): - """ MSGNet (from MSG-Net paper) - Enables passing identity all the way through the generator - ref https://arxiv.org/abs/1703.06953 - - Args: - input_nc(int): Number of input channels, default is 3. - output_nc(int): Number of output channels, default is 3. - ngf(int): Number of input channel for middle layer, default is 128. - n_blocks(int): Block number, default is 6. - norm_layer(nn.Layer): Batch norm layer, default is nn.InstanceNorm2D. - load_checkpoint(str): Pretrained checkpoint path, default is None. - - Return: - img(paddle.Tensor): MSGNet output. - """ - def __init__(self, - input_nc=3, - output_nc=3, - ngf=128, - n_blocks=6, - norm_layer=nn.InstanceNorm2D, - load_checkpoint=None): - super(MSGNet, self).__init__() - self.gram = GramMatrix() - block = Bottleneck - upblock = UpBottleneck - expansion = 4 - - model1 = [ - ConvLayer(input_nc, 64, kernel_size=7, stride=1), - norm_layer(64), - nn.ReLU(), - block(64, 32, 2, 1, norm_layer), - block(32 * expansion, ngf, 2, 1, norm_layer) - ] - - self.model1 = nn.Sequential(*tuple(model1)) - - model = [] - model += model1 - - self.ins = Inspiration(ngf * expansion) - model.append(self.ins) - for i in range(n_blocks): - model += [block(ngf * expansion, ngf, 1, None, norm_layer)] - - model += [ - upblock(ngf * expansion, 32, 2, norm_layer), - upblock(32 * expansion, 16, 2, norm_layer), - norm_layer(16 * expansion), - nn.ReLU(), - ConvLayer(16 * expansion, output_nc, kernel_size=7, stride=1) - ] - model = tuple(model) - self.model = nn.Sequential(*model) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint) - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'style_paddle.pdparams') - if not os.path.exists(checkpoint): - os.system('wget https://bj.bcebos.com/paddlehub/model/image/image_editing/style_paddle.pdparams -O ' + - checkpoint) - model_dict = paddle.load(checkpoint) - model_dict_clone = model_dict.copy() - for key, value in model_dict_clone.items(): - if key.endswith(("scale")): - name = key.rsplit('.', 1)[0] + '.bias' - model_dict[name] = paddle.zeros(shape=model_dict[name].shape, dtype='float32') - model_dict[key] = paddle.ones(shape=model_dict[key].shape, dtype='float32') - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - self._vgg = None - - def transform(self, path: str): - transform = Compose([Resize( - (256, 256), interp='LINEAR'), CenterCrop(crop_size=256)], SetType(datatype='float32')) - return transform(path) - - def setTarget(self, Xs: paddle.Tensor): - """Calculate feature gram matrix""" - F = self.model1(Xs) - G = self.gram(F) - self.ins.setTarget(G) - - def getFeature(self, input: paddle.Tensor): - if not self._vgg: - self._vgg = Vgg16() - return self._vgg(input) - - def forward(self, input: paddle.Tensor): - return self.model(input) diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/data_feed.py b/hub_module/modules/image/style_transfer/stylepro_artistic/data_feed.py deleted file mode 100644 index 061cde3de93928e8f465039b1fae7c3857aa40a8..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/style_transfer/stylepro_artistic/data_feed.py +++ /dev/null @@ -1,85 +0,0 @@ -# coding=utf-8 -import os -import time -from collections import OrderedDict - -import cv2 -import numpy as np -from PIL import Image - -__all__ = ['reader'] - - -def reader(images=None, paths=None): - """ - Preprocess to get image data. - - Args: - images (list): list of dict objects, each dict contains key: - content(str): value is a numpy.ndarry with shape [H, W, C], content data. - styles(str): value is a list of numpy.ndarray with shape [H, W, C], styles data. - weights(str, optional): value is the interpolation weights correspond to styles. - paths (list): list of dict objects, each dict contains key: - content(str): value is the path to content. - styles(str): value is the paths to styles. - weights(str, optional): value is the interpolation weights correspond to styles. - Yield: - im (numpy.ndarray): preprocessed data, with shape (1, 3, 512, 512). - """ - pipeline_list = list() - # images - for key, data in [('im_arr', images), ('im_path', paths)]: - if data is not None: - for component in data: - each_res = OrderedDict() - # content_arr - each_res['content_arr'], w, h = _handle_single( - **{key: component['content']}) - # styles_arr_list - styles_list = component['styles'] - styles_num = len(styles_list) - each_res['styles_arr_list'] = [] - for i, style_arr in enumerate(styles_list): - each_res['styles_arr_list'].append( - _handle_single(**{key: style_arr})[0]) - # style_interpolation_weights - if 'weights' in component: - assert len( - component['weights'] - ) == styles_num, "The number of weights must be equal to the number of styles." - each_res['style_interpolation_weights'] = component[ - 'weights'] - else: - each_res['style_interpolation_weights'] = np.ones( - styles_num) - each_res['style_interpolation_weights'] = [ - each_res['style_interpolation_weights'][j] / sum( - each_res['style_interpolation_weights']) - for j in range(styles_num) - ] - pipeline_list.append([each_res, w, h]) - - # yield - for element in pipeline_list: - yield element - - -def _handle_single(im_path=None, im_arr=None): - """ - Preprocess to get image data. - Args: - im_path (str): path to image. - im_arr (numpy.ndarray): image data, with shape (H, W, 3). - Returns: - im (numpy.ndarray): preprocessed data, with shape (1, 3, 512, 512). - """ - if im_path is not None: - im = cv2.imread(im_path)[:, :, ::-1].astype(np.float32) - if im_arr is not None: - im = im_arr[:, :, ::-1].astype(np.float32) - w, h = im.shape[1], im.shape[0] - im = cv2.resize(im, (512, 512), interpolation=cv2.INTER_LINEAR) - im = im.transpose((2, 0, 1)) - im = np.expand_dims(im, axis=0) - im /= 255.0 - return im, w, h diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/decoder_network.py b/hub_module/modules/image/style_transfer/stylepro_artistic/decoder_network.py deleted file mode 100644 index ff96701b50b378e9a66015089c7cb8ff441f9bbc..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/style_transfer/stylepro_artistic/decoder_network.py +++ /dev/null @@ -1,198 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def decoder_net(): - x2paddle_22 = fluid.layers.create_parameter( - dtype='float32', - shape=[4], - name='x2paddle_22', - attr='x2paddle_22', - default_initializer=Constant(0.0)) - x2paddle_36 = fluid.layers.create_parameter( - dtype='float32', - shape=[4], - name='x2paddle_36', - attr='x2paddle_36', - default_initializer=Constant(0.0)) - x2paddle_44 = fluid.layers.create_parameter( - dtype='float32', - shape=[4], - name='x2paddle_44', - attr='x2paddle_44', - default_initializer=Constant(0.0)) - x2paddle_input_1 = fluid.layers.data( - dtype='float32', - shape=[1, 512, 64, 64], - name='x2paddle_input_1', - append_batch_size=False) - x2paddle_19 = fluid.layers.pad2d( - x2paddle_input_1, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_19') - x2paddle_20 = fluid.layers.conv2d( - x2paddle_19, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_1', - name='x2paddle_20', - bias_attr='x2paddle_2') - x2paddle_21 = fluid.layers.relu(x2paddle_20, name='x2paddle_21') - x2paddle_23 = fluid.layers.resize_nearest( - x2paddle_21, name='x2paddle_23', out_shape=[128, 128]) - x2paddle_24 = fluid.layers.pad2d( - x2paddle_23, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_24') - x2paddle_25 = fluid.layers.conv2d( - x2paddle_24, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_3', - name='x2paddle_25', - bias_attr='x2paddle_4') - x2paddle_26 = fluid.layers.relu(x2paddle_25, name='x2paddle_26') - x2paddle_27 = fluid.layers.pad2d( - x2paddle_26, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_27') - x2paddle_28 = fluid.layers.conv2d( - x2paddle_27, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_5', - name='x2paddle_28', - bias_attr='x2paddle_6') - x2paddle_29 = fluid.layers.relu(x2paddle_28, name='x2paddle_29') - x2paddle_30 = fluid.layers.pad2d( - x2paddle_29, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_30') - x2paddle_31 = fluid.layers.conv2d( - x2paddle_30, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_7', - name='x2paddle_31', - bias_attr='x2paddle_8') - x2paddle_32 = fluid.layers.relu(x2paddle_31, name='x2paddle_32') - x2paddle_33 = fluid.layers.pad2d( - x2paddle_32, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_33') - x2paddle_34 = fluid.layers.conv2d( - x2paddle_33, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_9', - name='x2paddle_34', - bias_attr='x2paddle_10') - x2paddle_35 = fluid.layers.relu(x2paddle_34, name='x2paddle_35') - x2paddle_37 = fluid.layers.resize_nearest( - x2paddle_35, name='x2paddle_37', out_shape=[256, 256]) - x2paddle_38 = fluid.layers.pad2d( - x2paddle_37, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_38') - x2paddle_39 = fluid.layers.conv2d( - x2paddle_38, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_11', - name='x2paddle_39', - bias_attr='x2paddle_12') - x2paddle_40 = fluid.layers.relu(x2paddle_39, name='x2paddle_40') - x2paddle_41 = fluid.layers.pad2d( - x2paddle_40, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_41') - x2paddle_42 = fluid.layers.conv2d( - x2paddle_41, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_13', - name='x2paddle_42', - bias_attr='x2paddle_14') - x2paddle_43 = fluid.layers.relu(x2paddle_42, name='x2paddle_43') - x2paddle_45 = fluid.layers.resize_nearest( - x2paddle_43, name='x2paddle_45', out_shape=[512, 512]) - x2paddle_46 = fluid.layers.pad2d( - x2paddle_45, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_46') - x2paddle_47 = fluid.layers.conv2d( - x2paddle_46, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_15', - name='x2paddle_47', - bias_attr='x2paddle_16') - x2paddle_48 = fluid.layers.relu(x2paddle_47, name='x2paddle_48') - x2paddle_49 = fluid.layers.pad2d( - x2paddle_48, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_49') - x2paddle_50 = fluid.layers.conv2d( - x2paddle_49, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_17', - name='x2paddle_50', - bias_attr='x2paddle_18') - return x2paddle_input_1, x2paddle_50 diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/encoder_network.py b/hub_module/modules/image/style_transfer/stylepro_artistic/encoder_network.py deleted file mode 100644 index af9391b1ed3ec60a8f18bdbb0f3896b8f79e3dad..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/style_transfer/stylepro_artistic/encoder_network.py +++ /dev/null @@ -1,213 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def encoder_net(): - x2paddle_0 = fluid.layers.data( - dtype='float32', - shape=[1, 3, 512, 512], - name='x2paddle_0', - append_batch_size=False) - x2paddle_21 = fluid.layers.conv2d( - x2paddle_0, - num_filters=3, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_1', - name='x2paddle_21', - bias_attr='x2paddle_2') - x2paddle_22 = fluid.layers.pad2d( - x2paddle_21, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_22') - x2paddle_23 = fluid.layers.conv2d( - x2paddle_22, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_3', - name='x2paddle_23', - bias_attr='x2paddle_4') - x2paddle_24 = fluid.layers.relu(x2paddle_23, name='x2paddle_24') - x2paddle_25 = fluid.layers.pad2d( - x2paddle_24, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_25') - x2paddle_26 = fluid.layers.conv2d( - x2paddle_25, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_5', - name='x2paddle_26', - bias_attr='x2paddle_6') - x2paddle_27 = fluid.layers.relu(x2paddle_26, name='x2paddle_27') - x2paddle_28 = fluid.layers.pool2d( - x2paddle_27, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_28', - exclusive=False) - x2paddle_29 = fluid.layers.pad2d( - x2paddle_28, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_29') - x2paddle_30 = fluid.layers.conv2d( - x2paddle_29, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_7', - name='x2paddle_30', - bias_attr='x2paddle_8') - x2paddle_31 = fluid.layers.relu(x2paddle_30, name='x2paddle_31') - x2paddle_32 = fluid.layers.pad2d( - x2paddle_31, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_32') - x2paddle_33 = fluid.layers.conv2d( - x2paddle_32, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_9', - name='x2paddle_33', - bias_attr='x2paddle_10') - x2paddle_34 = fluid.layers.relu(x2paddle_33, name='x2paddle_34') - x2paddle_35 = fluid.layers.pool2d( - x2paddle_34, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_35', - exclusive=False) - x2paddle_36 = fluid.layers.pad2d( - x2paddle_35, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_36') - x2paddle_37 = fluid.layers.conv2d( - x2paddle_36, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_11', - name='x2paddle_37', - bias_attr='x2paddle_12') - x2paddle_38 = fluid.layers.relu(x2paddle_37, name='x2paddle_38') - x2paddle_39 = fluid.layers.pad2d( - x2paddle_38, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_39') - x2paddle_40 = fluid.layers.conv2d( - x2paddle_39, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_13', - name='x2paddle_40', - bias_attr='x2paddle_14') - x2paddle_41 = fluid.layers.relu(x2paddle_40, name='x2paddle_41') - x2paddle_42 = fluid.layers.pad2d( - x2paddle_41, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_42') - x2paddle_43 = fluid.layers.conv2d( - x2paddle_42, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_15', - name='x2paddle_43', - bias_attr='x2paddle_16') - x2paddle_44 = fluid.layers.relu(x2paddle_43, name='x2paddle_44') - x2paddle_45 = fluid.layers.pad2d( - x2paddle_44, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_45') - x2paddle_46 = fluid.layers.conv2d( - x2paddle_45, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_17', - name='x2paddle_46', - bias_attr='x2paddle_18') - x2paddle_47 = fluid.layers.relu(x2paddle_46, name='x2paddle_47') - x2paddle_48 = fluid.layers.pool2d( - x2paddle_47, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_48', - exclusive=False) - x2paddle_49 = fluid.layers.pad2d( - x2paddle_48, - pad_value=0.0, - mode='reflect', - paddings=[1, 1, 1, 1], - name='x2paddle_49') - x2paddle_50 = fluid.layers.conv2d( - x2paddle_49, - num_filters=512, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_19', - name='x2paddle_50', - bias_attr='x2paddle_20') - x2paddle_51 = fluid.layers.relu(x2paddle_50, name='x2paddle_51') - return x2paddle_0, x2paddle_51 diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/module.py b/hub_module/modules/image/style_transfer/stylepro_artistic/module.py deleted file mode 100644 index 7fc1461246899d8f21a2d12d8dce6dd4aa65f331..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/style_transfer/stylepro_artistic/module.py +++ /dev/null @@ -1,293 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division - -import ast -import copy -import time -import os -import argparse - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from stylepro_artistic.encoder_network import encoder_net -from stylepro_artistic.decoder_network import decoder_net -from stylepro_artistic.processor import postprocess, fr, cv2_to_base64, base64_to_cv2 -from stylepro_artistic.data_feed import reader - - -@moduleinfo( - name="stylepro_artistic", - version="1.0.1", - type="cv/style_transfer", - summary= - "StylePro Artistic is an algorithm for Arbitrary image style, which is parameter-free, fast yet effective.", - author="baidu-bdl", - author_email="") -class StyleProjection(hub.Module): - def _initialize(self): - self.pretrained_encoder_net = os.path.join(self.directory, - "style_projection_enc") - self.pretrained_decoder_net = os.path.join(self.directory, - "style_projection_dec") - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - # encoder - cpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) - cpu_config_enc.disable_glog_info() - cpu_config_enc.disable_gpu() - self.cpu_predictor_enc = create_paddle_predictor(cpu_config_enc) - # decoder - cpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) - cpu_config_dec.disable_glog_info() - cpu_config_dec.disable_gpu() - self.cpu_predictor_dec = create_paddle_predictor(cpu_config_dec) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - # encoder - gpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) - gpu_config_enc.disable_glog_info() - gpu_config_enc.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor_enc = create_paddle_predictor(gpu_config_enc) - # decoder - gpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) - gpu_config_dec.disable_glog_info() - gpu_config_dec.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor_dec = create_paddle_predictor(gpu_config_dec) - - def style_transfer(self, - images=None, - paths=None, - alpha=1, - use_gpu=False, - output_dir='transfer_result', - visualization=False): - """ - API for image style transfer. - - Args: - images (list): list of dict objects, each dict contains key: - content(str): value is a numpy.ndarry with shape [H, W, C], content data. - styles(str): value is a list of numpy.ndarray with shape [H, W, C], styles data. - weights(str, optional): value is the interpolation weights correspond to styles. - paths (list): list of dict objects, each dict contains key: - content(str): value is the path to content. - styles(str): value is the paths to styles. - weights(str, optional): value is the interpolation weights correspond to styles. - alpha (float): The weight that controls the degree of stylization. Should be between 0 and 1. - use_gpu (bool): whether to use gpu. - output_dir (str): the path to store output images. - visualization (bool): whether to save image or not. - - Returns: - im_output (list[dict()]): list of output images and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - im_output = [] - for component, w, h in reader(images, paths): - content = PaddleTensor(component['content_arr'].copy()) - content_feats = self.gpu_predictor_enc.run( - [content]) if use_gpu else self.cpu_predictor_enc.run([content]) - accumulate = np.zeros((3, 512, 512)) - for idx, style_arr in enumerate(component['styles_arr_list']): - style = PaddleTensor(style_arr.copy()) - # encode - style_feats = self.gpu_predictor_enc.run( - [style]) if use_gpu else self.cpu_predictor_enc.run([style]) - fr_feats = fr(content_feats[0].as_ndarray(), - style_feats[0].as_ndarray(), alpha) - fr_feats = PaddleTensor(fr_feats.copy()) - # decode - predict_outputs = self.gpu_predictor_dec.run([ - fr_feats - ]) if use_gpu else self.cpu_predictor_dec.run([fr_feats]) - # interpolation - accumulate += predict_outputs[0].as_ndarray( - )[0] * component['style_interpolation_weights'][idx] - # postprocess - save_im_name = 'ndarray_{}.jpg'.format(time.time()) - result = postprocess( - accumulate, - output_dir, - save_im_name, - visualization, - size=(w, h)) - im_output.append(result) - return im_output - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - encode_dirname = os.path.join(dirname, 'encoder') - decode_dirname = os.path.join(dirname, 'decoder') - self._save_encode_model(encode_dirname, model_filename, params_filename, - combined) - self._save_decode_model(decode_dirname, model_filename, params_filename, - combined) - - def _save_encode_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - encode_program, encode_feeded_var_names, encode_target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_encoder_net, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=encode_program, - executor=exe, - feeded_var_names=encode_feeded_var_names, - target_vars=encode_target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def _save_decode_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - decode_program, decode_feeded_var_names, decode_target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_decoder_net, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=decode_program, - executor=exe, - feeded_var_names=decode_feeded_var_names, - target_vars=decode_target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = copy.deepcopy(images) - for image in images_decode: - image['content'] = base64_to_cv2(image['content']) - image['styles'] = [ - base64_to_cv2(style) for style in image['styles'] - ] - results = self.style_transfer(images_decode, **kwargs) - results = [{ - 'data': cv2_to_base64(result['data']) - } for result in results] - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command. - """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - if args.weights is None: - paths = [{ - 'content': args.content, - 'styles': args.styles.split(',') - }] - else: - paths = [{ - 'content': args.content, - 'styles': args.styles.split(','), - 'weights': list(args.weights) - }] - results = self.style_transfer( - paths=paths, - alpha=args.alpha, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=True) - return results - - def add_module_config_arg(self): - """ - Add the command config options. - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='transfer_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=True, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options. - """ - self.arg_input_group.add_argument( - '--content', type=str, help="path to content.") - self.arg_input_group.add_argument( - '--styles', type=str, help="path to styles.") - self.arg_input_group.add_argument( - '--weights', - type=ast.literal_eval, - default=None, - help="interpolation weights of styles.") - self.arg_config_group.add_argument( - '--alpha', - type=ast.literal_eval, - default=1, - help="The parameter to control the tranform degree.") diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/processor.py b/hub_module/modules/image/style_transfer/stylepro_artistic/processor.py deleted file mode 100644 index a75e04fbbe22a67da44155ce5d984e2066530eae..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/style_transfer/stylepro_artistic/processor.py +++ /dev/null @@ -1,113 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import base64 -import cv2 -import numpy as np - -__all__ = ['postprocess', 'fr'] - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def postprocess(im, output_dir, save_im_name, visualization, size): - im = np.multiply(im, 255.0) + 0.5 - im = np.clip(im, 0, 255) - im = im.astype(np.uint8) - im = im.transpose((1, 2, 0)) - im = im[:, :, ::-1] - im = cv2.resize(im, (size[0], size[1]), interpolation=cv2.INTER_LINEAR) - result = {'data': im} - if visualization: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - elif os.path.isfile(output_dir): - os.remove(output_dir) - os.makedirs(output_dir) - # save image - save_path = os.path.join(output_dir, save_im_name) - cv2.imwrite(save_path, im) - result['save_path'] = save_path - return result - - -def fr(content_feat, style_feat, alpha): - content_feat = np.reshape(content_feat, (512, -1)) - style_feat = np.reshape(style_feat, (512, -1)) - - content_feat_index = np.argsort(content_feat, axis=1) - style_feat = np.sort(style_feat, axis=1) - - fr_feat = scatter_numpy(dim=1, index=content_feat_index, src=style_feat) - fr_feat = fr_feat * alpha + content_feat * (1 - alpha) - fr_feat = np.reshape(fr_feat, (1, 512, 64, 64)) - return fr_feat - - -def scatter_numpy(dim, index, src): - """ - Writes all values from the Tensor src into dst at the indices specified in the index Tensor. - - :param dim: The axis along which to index - :param index: The indices of elements to scatter - :param src: The source element(s) to scatter - :return: dst - """ - dst = src.copy() - idx_xsection_shape = index.shape[:dim] + index.shape[dim + 1:] - dst_xsection_shape = dst.shape[:dim] + dst.shape[dim + 1:] - if idx_xsection_shape != dst_xsection_shape: - raise ValueError( - "Except for dimension " + str(dim) + - ", all dimensions of index and output should be the same size") - if (index >= dst.shape[dim]).any() or (index < 0).any(): - raise IndexError("The values of index must be between 0 and {}.".format( - dst.shape[dim] - 1)) - - def make_slice(arr, dim, i): - slc = [slice(None)] * arr.ndim - slc[dim] = i - return tuple(slc) - - # We use index and dim parameters to create idx - # idx is in a form that can be used as a NumPy advanced index for scattering of src param. - idx = [[ - *np.indices(idx_xsection_shape).reshape(index.ndim - 1, -1), - index[make_slice(index, dim, i)].reshape(1, -1)[0] - ] for i in range(index.shape[dim])] - idx = list(np.concatenate(idx, axis=1)) - idx.insert(dim, idx.pop()) - - if not np.isscalar(src): - if index.shape[dim] > src.shape[dim]: - raise IndexError("Dimension " + str(dim) + - "of index can not be bigger than that of src ") - src_xsection_shape = src.shape[:dim] + src.shape[dim + 1:] - if idx_xsection_shape != src_xsection_shape: - raise ValueError( - "Except for dimension " + str(dim) + - ", all dimensions of index and src should be the same size") - # src_idx is a NumPy advanced index for indexing of elements in the src - src_idx = list(idx) - src_idx.pop(dim) - src_idx.insert( - dim, - np.repeat(np.arange(index.shape[dim]), np.prod(idx_xsection_shape))) - dst[tuple(idx)] = src[tuple(src_idx)] - else: - dst[idx] = src - return dst diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py b/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py deleted file mode 100644 index 8e5f10211ba441a7dd9b4948413b79c8721eab07..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import string - - -class CharacterOps(object): - """ Convert between text-label and text-index """ - - def __init__(self, config): - self.character_type = config['character_type'] - self.loss_type = config['loss_type'] - if self.character_type == "en": - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - elif self.character_type == "ch": - character_dict_path = config['character_dict_path'] - self.character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n") - self.character_str += line - dict_character = list(self.character_str) - elif self.character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - else: - self.character_str = None - assert self.character_str is not None, \ - "Nonsupport type of the character: {}".format(self.character_str) - self.beg_str = "sos" - self.end_str = "eos" - if self.loss_type == "attention": - dict_character = [self.beg_str, self.end_str] + dict_character - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if self.character_type == "en": - text = text.lower() - - text_list = [] - for char in text: - if char not in self.dict: - continue - text_list.append(self.dict[char]) - text = np.array(text_list) - return text - - def decode(self, text_index, is_remove_duplicate=False): - """ convert text-index into text-label. """ - char_list = [] - char_num = self.get_char_num() - - if self.loss_type == "attention": - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - ignored_tokens = [beg_idx, end_idx] - else: - ignored_tokens = [char_num] - - for idx in range(len(text_index)): - if text_index[idx] in ignored_tokens: - continue - if is_remove_duplicate: - if idx > 0 and text_index[idx - 1] == text_index[idx]: - continue - char_list.append(self.character[text_index[idx]]) - text = ''.join(char_list) - return text - - def get_char_num(self): - return len(self.character) - - def get_beg_end_flag_idx(self, beg_or_end): - if self.loss_type == "attention": - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx"\ - % beg_or_end - return idx - else: - err = "error in get_beg_end_flag_idx when using the loss %s"\ - % (self.loss_type) - assert False, err - - -def cal_predicts_accuracy(char_ops, - preds, - preds_lod, - labels, - labels_lod, - is_remove_duplicate=False): - acc_num = 0 - img_num = 0 - for ino in range(len(labels_lod) - 1): - beg_no = preds_lod[ino] - end_no = preds_lod[ino + 1] - preds_text = preds[beg_no:end_no].reshape(-1) - preds_text = char_ops.decode(preds_text, is_remove_duplicate) - - beg_no = labels_lod[ino] - end_no = labels_lod[ino + 1] - labels_text = labels[beg_no:end_no].reshape(-1) - labels_text = char_ops.decode(labels_text, is_remove_duplicate) - img_num += 1 - - if preds_text == labels_text: - acc_num += 1 - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def convert_rec_attention_infer_res(preds): - img_num = preds.shape[0] - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - end_pos = np.where(preds[ino, :] == 1)[0] - if len(end_pos) <= 1: - text_list = preds[ino, 1:] - else: - text_list = preds[ino, 1:end_pos[1]] - target_lod.append(target_lod[ino] + len(text_list)) - convert_ids = convert_ids + list(text_list) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod - - -def convert_rec_label_to_lod(ori_labels): - img_num = len(ori_labels) - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - target_lod.append(target_lod[ino] + len(ori_labels[ino])) - convert_ids = convert_ids + list(ori_labels[ino]) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py deleted file mode 100644 index 490bba80765e5c15099001ce5abe65f8e504ab1a..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import copy -import math -import os -import time - -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -from chinese_ocr_db_crnn_mobile.character import CharacterOps -from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes - - -@moduleinfo( - name="chinese_ocr_db_crnn_mobile", - version="1.0.3", - summary= - "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", - author="paddle-dev", - author_email="paddle-dev@baidu.com", - type="cv/text_recognition") -class ChineseOCRDBCRNN(hub.Module): - def _initialize(self, text_detector_module=None): - """ - initialize with the necessary elements - """ - self.character_dict_path = os.path.join(self.directory, 'assets', - 'ppocr_keys_v1.txt') - char_ops_params = { - 'character_type': 'ch', - 'character_dict_path': self.character_dict_path, - 'loss_type': 'ctc' - } - self.char_ops = CharacterOps(char_ops_params) - self.rec_image_shape = [3, 32, 320] - self._text_detector_module = text_detector_module - self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') - self.pretrained_model_path = os.path.join(self.directory, - 'inference_model') - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - - config.disable_glog_info() - - # use zero copy - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) - input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) - output_names = self.predictor.get_output_names() - self.output_tensors = [] - for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) - self.output_tensors.append(output_tensor) - - @property - def text_detector_module(self): - """ - text detect module - """ - if not self._text_detector_module: - self._text_detector_module = hub.Module( - name='chinese_text_detection_db_mobile') - return self._text_detector_module - - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - images.append(img) - return images - - def get_rotate_crop_image(self, img, points): - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - img_crop_width = int(np.linalg.norm(points[0] - points[1])) - img_crop_height = int(np.linalg.norm(points[0] - points[3])) - pts_std = np.float32([[0, 0], [img_crop_width, 0],\ - [img_crop_width, img_crop_height], [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img_crop, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - def resize_norm_img(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - imgW = int(32 * max_wh_ratio) - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def recognize_text(self, - images=[], - paths=[], - use_gpu=False, - output_dir='ocr_result', - visualization=False, - box_thresh=0.5, - text_thresh=0.5): - """ - Get the chinese texts in the predicted images. - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths - paths (list[str]): The paths of images. If paths not images - use_gpu (bool): Whether to use gpu. - batch_size(int): the program deals once with one - output_dir (str): The directory to store output images. - visualization (bool): Whether to save image or not. - box_thresh(float): the threshold of the detected text box's confidence - text_thresh(float): the threshold of the recognize chinese texts' confidence - Returns: - res (list): The result of chinese texts and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." - ) - - self.use_gpu = use_gpu - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError("The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - detection_results = self.text_detector_module.detect_text( - images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) - boxes = [ - np.array(item['data']).astype(np.float32) - for item in detection_results - ] - all_results = [] - for index, img_boxes in enumerate(boxes): - original_image = predicted_data[index].copy() - result = {'save_path': ''} - if img_boxes is None: - result['data'] = [] - else: - img_crop_list = [] - boxes = sorted_boxes(img_boxes) - for num_box in range(len(boxes)): - tmp_box = copy.deepcopy(boxes[num_box]) - img_crop = self.get_rotate_crop_image( - original_image, tmp_box) - img_crop_list.append(img_crop) - - rec_results = self._recognize_text(img_crop_list) - # if the recognized text confidence score is lower than text_thresh, then drop it - rec_res_final = [] - for index, res in enumerate(rec_results): - text, score = res - if score >= text_thresh: - rec_res_final.append({ - 'text': - text, - 'confidence': - float(score), - 'text_box_position': - boxes[index].astype(np.int).tolist() - }) - result['data'] = rec_res_final - - if visualization and result['data']: - result['save_path'] = self.save_result_image( - original_image, boxes, rec_results, output_dir, - text_thresh) - all_results.append(result) - - return all_results - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.recognize_text(images_decode, **kwargs) - return results - - def save_result_image(self, - original_image, - detection_boxes, - rec_results, - output_dir='ocr_result', - text_thresh=0.5): - image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) - txts = [item[0] for item in rec_results] - scores = [item[1] for item in rec_results] - draw_img = draw_ocr( - image, - detection_boxes, - txts, - scores, - font_file=self.font_file, - draw_txt=True, - drop_score=text_thresh) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - ext = get_image_ext(original_image) - saved_name = 'ndarray_{}{}'.format(time.time(), ext) - save_file_path = os.path.join(output_dir, saved_name) - cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) - return save_file_path - - def _recognize_text(self, image_list): - img_num = len(image_list) - batch_num = 30 - rec_res = [] - predict_time = 0 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = image_list[ino].shape[0:2] - wh_ratio = w / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(image_list[ino], max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.zero_copy_run() - rec_idx_batch = self.output_tensors[0].copy_to_cpu() - rec_idx_lod = self.output_tensors[0].lod()[0] - predict_batch = self.output_tensors[1].copy_to_cpu() - predict_lod = self.output_tensors[1].lod()[0] - - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - beg = predict_lod[rno] - end = predict_lod[rno + 1] - probs = predict_batch[beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - if len(valid_ind) == 0: - continue - score = np.mean(probs[valid_ind, ind[valid_ind]]) - rec_res.append([preds_text, score]) - - return rec_res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - detector_dir = os.path.join(dirname, 'text_detector') - recognizer_dir = os.path.join(dirname, 'text_recognizer') - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - self._save_recognizer_model(recognizer_dir, model_filename, - params_filename, combined) - logger.info("The inference model has been saved in the path {}".format( - os.path.realpath(dirname))) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.text_detector_module.save_inference_model( - dirname, model_filename, params_filename, combined) - - def _save_recognizer_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.recognize_text( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='ocr_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - ocr = ChineseOCRDBCRNN() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py b/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py deleted file mode 100644 index cc9e9effc1b5904426377617b899d9aba9900d3e..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -from PIL import Image, ImageDraw, ImageFont -import base64 -import cv2 -import numpy as np - - -def draw_ocr(image, - boxes, - txts, - scores, - font_file, - draw_txt=True, - drop_score=0.5): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - draw_txt(bool): whether draw text or not - drop_score(float): only scores greater than drop_threshold will be visualized - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score or math.isnan(score): - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - - if draw_txt: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - font_file, - img_h=img.shape[0], - img_w=600, - threshold=drop_score) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_file, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def resize_img(img, input_size=600): - img = np.array(img) - im_shape = img.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return im - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py b/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py deleted file mode 100644 index 8e5f10211ba441a7dd9b4948413b79c8721eab07..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import string - - -class CharacterOps(object): - """ Convert between text-label and text-index """ - - def __init__(self, config): - self.character_type = config['character_type'] - self.loss_type = config['loss_type'] - if self.character_type == "en": - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - elif self.character_type == "ch": - character_dict_path = config['character_dict_path'] - self.character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n") - self.character_str += line - dict_character = list(self.character_str) - elif self.character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - else: - self.character_str = None - assert self.character_str is not None, \ - "Nonsupport type of the character: {}".format(self.character_str) - self.beg_str = "sos" - self.end_str = "eos" - if self.loss_type == "attention": - dict_character = [self.beg_str, self.end_str] + dict_character - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if self.character_type == "en": - text = text.lower() - - text_list = [] - for char in text: - if char not in self.dict: - continue - text_list.append(self.dict[char]) - text = np.array(text_list) - return text - - def decode(self, text_index, is_remove_duplicate=False): - """ convert text-index into text-label. """ - char_list = [] - char_num = self.get_char_num() - - if self.loss_type == "attention": - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - ignored_tokens = [beg_idx, end_idx] - else: - ignored_tokens = [char_num] - - for idx in range(len(text_index)): - if text_index[idx] in ignored_tokens: - continue - if is_remove_duplicate: - if idx > 0 and text_index[idx - 1] == text_index[idx]: - continue - char_list.append(self.character[text_index[idx]]) - text = ''.join(char_list) - return text - - def get_char_num(self): - return len(self.character) - - def get_beg_end_flag_idx(self, beg_or_end): - if self.loss_type == "attention": - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx"\ - % beg_or_end - return idx - else: - err = "error in get_beg_end_flag_idx when using the loss %s"\ - % (self.loss_type) - assert False, err - - -def cal_predicts_accuracy(char_ops, - preds, - preds_lod, - labels, - labels_lod, - is_remove_duplicate=False): - acc_num = 0 - img_num = 0 - for ino in range(len(labels_lod) - 1): - beg_no = preds_lod[ino] - end_no = preds_lod[ino + 1] - preds_text = preds[beg_no:end_no].reshape(-1) - preds_text = char_ops.decode(preds_text, is_remove_duplicate) - - beg_no = labels_lod[ino] - end_no = labels_lod[ino + 1] - labels_text = labels[beg_no:end_no].reshape(-1) - labels_text = char_ops.decode(labels_text, is_remove_duplicate) - img_num += 1 - - if preds_text == labels_text: - acc_num += 1 - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def convert_rec_attention_infer_res(preds): - img_num = preds.shape[0] - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - end_pos = np.where(preds[ino, :] == 1)[0] - if len(end_pos) <= 1: - text_list = preds[ino, 1:] - else: - text_list = preds[ino, 1:end_pos[1]] - target_lod.append(target_lod[ino] + len(text_list)) - convert_ids = convert_ids + list(text_list) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod - - -def convert_rec_label_to_lod(ori_labels): - img_num = len(ori_labels) - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - target_lod.append(target_lod[ino] + len(ori_labels[ino])) - convert_ids = convert_ids + list(ori_labels[ino]) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py deleted file mode 100644 index 2c82cdba4976209b1726a17df8687ed875e9b830..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py +++ /dev/null @@ -1,435 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import copy -import math -import os -import time - -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -from chinese_ocr_db_crnn_server.character import CharacterOps -from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes - - -@moduleinfo( - name="chinese_ocr_db_crnn_server", - version="1.0.2", - summary= - "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", - author="paddle-dev", - author_email="paddle-dev@baidu.com", - type="cv/text_recognition") -class ChineseOCRDBCRNNServer(hub.Module): - def _initialize(self, text_detector_module=None): - """ - initialize with the necessary elements - """ - self.character_dict_path = os.path.join(self.directory, 'assets', - 'ppocr_keys_v1.txt') - char_ops_params = { - 'character_type': 'ch', - 'character_dict_path': self.character_dict_path, - 'loss_type': 'ctc' - } - self.char_ops = CharacterOps(char_ops_params) - self.rec_image_shape = [3, 32, 320] - self._text_detector_module = text_detector_module - self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') - self.pretrained_model_path = os.path.join(self.directory, 'assets', - 'ch_rec_r34_vd_crnn') - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - - config.disable_glog_info() - - # use zero copy - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) - input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) - output_names = self.predictor.get_output_names() - self.output_tensors = [] - for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) - self.output_tensors.append(output_tensor) - - @property - def text_detector_module(self): - """ - text detect module - """ - if not self._text_detector_module: - self._text_detector_module = hub.Module( - name='chinese_text_detection_db_server') - return self._text_detector_module - - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - images.append(img) - return images - - def get_rotate_crop_image(self, img, points): - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - img_crop_width = int(np.linalg.norm(points[0] - points[1])) - img_crop_height = int(np.linalg.norm(points[0] - points[3])) - pts_std = np.float32([[0, 0], [img_crop_width, 0],\ - [img_crop_width, img_crop_height], [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img_crop, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - def resize_norm_img(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - imgW = int(32 * max_wh_ratio) - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def recognize_text(self, - images=[], - paths=[], - use_gpu=False, - output_dir='ocr_result', - visualization=False, - box_thresh=0.5, - text_thresh=0.5): - """ - Get the chinese texts in the predicted images. - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths - paths (list[str]): The paths of images. If paths not images - use_gpu (bool): Whether to use gpu. - batch_size(int): the program deals once with one - output_dir (str): The directory to store output images. - visualization (bool): Whether to save image or not. - box_thresh(float): the threshold of the detected text box's confidence - text_thresh(float): the threshold of the recognize chinese texts' confidence - Returns: - res (list): The result of chinese texts and save path of images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." - ) - - self.use_gpu = use_gpu - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError("The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - detection_results = self.text_detector_module.detect_text( - images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) - boxes = [ - np.array(item['data']).astype(np.float32) - for item in detection_results - ] - all_results = [] - for index, img_boxes in enumerate(boxes): - original_image = predicted_data[index].copy() - result = {'save_path': ''} - if img_boxes is None: - result['data'] = [] - else: - img_crop_list = [] - boxes = sorted_boxes(img_boxes) - for num_box in range(len(boxes)): - tmp_box = copy.deepcopy(boxes[num_box]) - img_crop = self.get_rotate_crop_image( - original_image, tmp_box) - img_crop_list.append(img_crop) - - rec_results = self._recognize_text(img_crop_list) - # if the recognized text confidence score is lower than text_thresh, then drop it - rec_res_final = [] - for index, res in enumerate(rec_results): - text, score = res - if score >= text_thresh: - rec_res_final.append({ - 'text': - text, - 'confidence': - float(score), - 'text_box_position': - boxes[index].astype(np.int).tolist() - }) - result['data'] = rec_res_final - - if visualization and result['data']: - result['save_path'] = self.save_result_image( - original_image, boxes, rec_results, output_dir, - text_thresh) - all_results.append(result) - - return all_results - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.recognize_text(images_decode, **kwargs) - return results - - def save_result_image(self, - original_image, - detection_boxes, - rec_results, - output_dir='ocr_result', - text_thresh=0.5): - image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) - txts = [item[0] for item in rec_results] - scores = [item[1] for item in rec_results] - draw_img = draw_ocr( - image, - detection_boxes, - txts, - scores, - font_file=self.font_file, - draw_txt=True, - drop_score=text_thresh) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - ext = get_image_ext(original_image) - saved_name = 'ndarray_{}{}'.format(time.time(), ext) - save_file_path = os.path.join(output_dir, saved_name) - cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) - return save_file_path - - def _recognize_text(self, image_list): - img_num = len(image_list) - batch_num = 30 - rec_res = [] - predict_time = 0 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = image_list[ino].shape[0:2] - wh_ratio = w / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img(image_list[ino], max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.zero_copy_run() - rec_idx_batch = self.output_tensors[0].copy_to_cpu() - rec_idx_lod = self.output_tensors[0].lod()[0] - predict_batch = self.output_tensors[1].copy_to_cpu() - predict_lod = self.output_tensors[1].lod()[0] - - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - beg = predict_lod[rno] - end = predict_lod[rno + 1] - probs = predict_batch[beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - if len(valid_ind) == 0: - continue - score = np.mean(probs[valid_ind, ind[valid_ind]]) - rec_res.append([preds_text, score]) - - return rec_res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - detector_dir = os.path.join(dirname, 'text_detector') - recognizer_dir = os.path.join(dirname, 'text_recognizer') - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - self._save_recognizer_model(recognizer_dir, model_filename, - params_filename, combined) - logger.info("The inference model has been saved in the path {}".format( - os.path.realpath(dirname))) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.text_detector_module.save_inference_model( - dirname, model_filename, params_filename, combined) - - def _save_recognizer_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.recognize_text( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='ocr_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - ocr = ChineseOCRDBCRNNServer() - print(ocr.name) - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py b/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py deleted file mode 100644 index cc9e9effc1b5904426377617b899d9aba9900d3e..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -from PIL import Image, ImageDraw, ImageFont -import base64 -import cv2 -import numpy as np - - -def draw_ocr(image, - boxes, - txts, - scores, - font_file, - draw_txt=True, - drop_score=0.5): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - draw_txt(bool): whether draw text or not - drop_score(float): only scores greater than drop_threshold will be visualized - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score or math.isnan(score): - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - - if draw_txt: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - font_file, - img_h=img.shape[0], - img_w=600, - threshold=drop_score) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_file, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def resize_img(img, input_size=600): - img = np.array(img) - im_shape = img.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return im - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py deleted file mode 100644 index 0ee5756f5bece95c08ecca2cd9bcdfe8f5ac53b0..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py +++ /dev/null @@ -1,333 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import math -import os -import time - -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import base64 -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -@moduleinfo( - name="chinese_text_detection_db_mobile", - version="1.0.1", - summary= - "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", - author="paddle-dev", - author_email="paddle-dev@baidu.com", - type="cv/text_recognition") -class ChineseTextDetectionDB(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, - 'inference_model') - self._set_config() - - def check_requirements(self): - try: - import shapely, pyclipper - except: - raise ImportError( - 'This module requires the shapely, pyclipper tools. The running environment does not meet the requirements. Please install the two packages.' - ) - - def _set_config(self): - """ - predictor config setting - """ - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - - config.disable_glog_info() - - # use zero copy - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) - input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) - output_names = self.predictor.get_output_names() - self.output_tensors = [] - for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) - self.output_tensors.append(output_tensor) - - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - images.append(img) - return images - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - left = int(np.min(box[:, 0])) - right = int(np.max(box[:, 0])) - top = int(np.min(box[:, 1])) - bottom = int(np.max(box[:, 1])) - bbox_height = bottom - top - bbox_width = right - left - diffh = math.fabs(box[0, 1] - box[1, 1]) - diffw = math.fabs(box[0, 0] - box[3, 0]) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 10 or rect_height <= 10: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def order_points_clockwise(self, pts): - """ - reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py - # sort the points based on their x-coordinates - """ - xSorted = pts[np.argsort(pts[:, 0]), :] - - # grab the left-most and right-most points from the sorted - # x-roodinate points - leftMost = xSorted[:2, :] - rightMost = xSorted[2:, :] - - # now, sort the left-most coordinates according to their - # y-coordinates so we can grab the top-left and bottom-left - # points, respectively - leftMost = leftMost[np.argsort(leftMost[:, 1]), :] - (tl, bl) = leftMost - - rightMost = rightMost[np.argsort(rightMost[:, 1]), :] - (tr, br) = rightMost - - rect = np.array([tl, tr, br, bl], dtype="float32") - return rect - - def detect_text(self, - images=[], - paths=[], - use_gpu=False, - output_dir='detection_result', - visualization=False, - box_thresh=0.5): - """ - Get the text box in the predicted images. - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths - paths (list[str]): The paths of images. If paths not images - use_gpu (bool): Whether to use gpu. Default false. - output_dir (str): The directory to store output images. - visualization (bool): Whether to save image or not. - box_thresh(float): the threshold of the detected text box's confidence - Returns: - res (list): The result of text detection box and save path of images. - """ - self.check_requirements() - - from chinese_text_detection_db_mobile.processor import DBPreProcess, DBPostProcess, draw_boxes, get_image_ext - - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." - ) - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError("The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - preprocessor = DBPreProcess() - postprocessor = DBPostProcess(box_thresh) - - all_imgs = [] - all_ratios = [] - all_results = [] - for original_image in predicted_data: - im, ratio_list = preprocessor(original_image) - res = {'save_path': ''} - if im is None: - res['data'] = [] - - else: - im = im.copy() - starttime = time.time() - self.input_tensor.copy_from_cpu(im) - self.predictor.zero_copy_run() - data_out = self.output_tensors[0].copy_to_cpu() - dt_boxes_list = postprocessor(data_out, [ratio_list]) - boxes = self.filter_tag_det_res(dt_boxes_list[0], - original_image.shape) - res['data'] = boxes.astype(np.int).tolist() - - all_imgs.append(im) - all_ratios.append(ratio_list) - if visualization: - img = Image.fromarray( - cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) - draw_img = draw_boxes(img, boxes) - draw_img = np.array(draw_img) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - ext = get_image_ext(original_image) - saved_name = 'ndarray_{}{}'.format(time.time(), ext) - cv2.imwrite( - os.path.join(output_dir, saved_name), - draw_img[:, :, ::-1]) - res['save_path'] = os.path.join(output_dir, saved_name) - - all_results.append(res) - - return all_results - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.detect_text(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.detect_text( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - db = ChineseTextDetectionDB() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = db.detect_text(paths=image_path, visualization=True) - db.save_inference_model('save') - print(res) diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py b/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py deleted file mode 100644 index aec5a11953bc094e21401acb81ca0074e22fd5de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py +++ /dev/null @@ -1,237 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys - -from PIL import Image, ImageDraw, ImageFont -from shapely.geometry import Polygon -import cv2 -import numpy as np -import pyclipper - - -class DBPreProcess(object): - def __init__(self, max_side_len=960): - self.max_side_len = max_side_len - - def resize_image_type(self, im): - """ - resize image to a size multiple of 32 which is required by the network - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - # limit the max side - if max(resize_h, resize_w) > self.max_side_len: - if resize_h > resize_w: - ratio = float(self.max_side_len) / resize_h - else: - ratio = float(self.max_side_len) / resize_w - else: - ratio = 1. - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - if resize_h % 32 == 0: - resize_h = resize_h - elif resize_h // 32 <= 1: - resize_h = 32 - else: - resize_h = (resize_h // 32 - 1) * 32 - if resize_w % 32 == 0: - resize_w = resize_w - elif resize_w // 32 <= 1: - resize_w = 32 - else: - resize_w = (resize_w // 32 - 1) * 32 - try: - if int(resize_w) <= 0 or int(resize_h) <= 0: - return None, (None, None) - im = cv2.resize(im, (int(resize_w), int(resize_h))) - except: - print(im.shape, resize_w, resize_h) - sys.exit(0) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - def normalize(self, im): - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - im = im / 255 - im -= img_mean - im /= img_std - channel_swap = (2, 0, 1) - im = im.transpose(channel_swap) - return im - - def __call__(self, im): - im, (ratio_h, ratio_w) = self.resize_image_type(im) - im = self.normalize(im) - im = im[np.newaxis, :] - return [im, (ratio_h, ratio_w)] - - -class DBPostProcess(object): - """ - The post process for Differentiable Binarization (DB). - """ - - def __init__(self, thresh=0.3, box_thresh=0.5, max_candidates=1000): - self.thresh = thresh - self.box_thresh = box_thresh - self.max_candidates = max_candidates - self.min_size = 3 - - def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' - _bitmap: single map with shape (1, H, W), - whose values are binarized as {0, 1} - ''' - - bitmap = _bitmap - height, width = bitmap.shape - - outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, - cv2.CHAIN_APPROX_SIMPLE) - if len(outs) == 3: - img, contours, _ = outs[0], outs[1], outs[2] - elif len(outs) == 2: - contours, _ = outs[0], outs[1] - - num_contours = min(len(contours), self.max_candidates) - boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) - scores = np.zeros((num_contours, ), dtype=np.float32) - - for index in range(num_contours): - contour = contours[index] - points, sside = self.get_mini_boxes(contour) - if sside < self.min_size: - continue - points = np.array(points) - score = self.box_score_fast(pred, points.reshape(-1, 2)) - if self.box_thresh > score: - continue - - box = self.unclip(points).reshape(-1, 1, 2) - box, sside = self.get_mini_boxes(box) - if sside < self.min_size + 2: - continue - box = np.array(box) - if not isinstance(dest_width, int): - dest_width = dest_width.item() - dest_height = dest_height.item() - - box[:, 0] = np.clip( - np.round(box[:, 0] / width * dest_width), 0, dest_width) - box[:, 1] = np.clip( - np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes[index, :, :] = box.astype(np.int16) - scores[index] = score - return boxes, scores - - def unclip(self, box, unclip_ratio=2.0): - poly = Polygon(box) - distance = poly.area * unclip_ratio / poly.length - offset = pyclipper.PyclipperOffset() - offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - expanded = np.array(offset.Execute(distance)) - return expanded - - def get_mini_boxes(self, contour): - bounding_box = cv2.minAreaRect(contour) - points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) - - index_1, index_2, index_3, index_4 = 0, 1, 2, 3 - if points[1][1] > points[0][1]: - index_1 = 0 - index_4 = 1 - else: - index_1 = 1 - index_4 = 0 - if points[3][1] > points[2][1]: - index_2 = 2 - index_3 = 3 - else: - index_2 = 3 - index_3 = 2 - - box = [ - points[index_1], points[index_2], points[index_3], points[index_4] - ] - return box, min(bounding_box[1]) - - def box_score_fast(self, bitmap, _box): - h, w = bitmap.shape[:2] - box = _box.copy() - xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) - xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) - ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) - ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - box[:, 0] = box[:, 0] - xmin - box[:, 1] = box[:, 1] - ymin - cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def __call__(self, predictions, ratio_list): - pred = predictions[:, 0, :, :] - segmentation = pred > self.thresh - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - height, width = pred.shape[-2:] - tmp_boxes, tmp_scores = self.boxes_from_bitmap( - pred[batch_index], segmentation[batch_index], width, height) - - boxes = [] - for k in range(len(tmp_boxes)): - if tmp_scores[k] > self.box_thresh: - boxes.append(tmp_boxes[k]) - if len(boxes) > 0: - boxes = np.array(boxes) - - ratio_h, ratio_w = ratio_list[batch_index] - boxes[:, :, 0] = boxes[:, :, 0] / ratio_w - boxes[:, :, 1] = boxes[:, :, 1] / ratio_h - - boxes_batch.append(boxes) - return boxes_batch - - -def draw_boxes(image, boxes, scores=None, drop_score=0.5): - img = image.copy() - draw = ImageDraw.Draw(img) - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score: - continue - draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red') - draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red') - draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red') - draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red') - draw.line([(box[0][0] - 1, box[0][1] + 1), - (box[1][0] - 1, box[1][1] + 1)], - fill='red') - draw.line([(box[1][0] - 1, box[1][1] + 1), - (box[2][0] - 1, box[2][1] + 1)], - fill='red') - draw.line([(box[2][0] - 1, box[2][1] + 1), - (box[3][0] - 1, box[3][1] + 1)], - fill='red') - draw.line([(box[3][0] - 1, box[3][1] + 1), - (box[0][0] - 1, box[0][1] + 1)], - fill='red') - return img - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/module.py deleted file mode 100644 index 6c07144b790c4b4e136e40147a201cc2ee926b82..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/module.py +++ /dev/null @@ -1,332 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import math -import os -import time - -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import base64 -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -@moduleinfo( - name="chinese_text_detection_db_server", - version="1.0.0", - summary= - "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", - author="paddle-dev", - author_email="paddle-dev@baidu.com", - type="cv/text_recognition") -class ChineseTextDetectionDBServer(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, - 'ch_det_r50_vd_db') - self._set_config() - - def check_requirements(self): - try: - import shapely, pyclipper - except: - raise ImportError( - 'This module requires the shapely, pyclipper tools. The running environment does not meet the requirements. Please install the two packages.' - ) - - def _set_config(self): - """ - predictor config setting - """ - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - - config.disable_glog_info() - - # use zero copy - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) - input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) - output_names = self.predictor.get_output_names() - self.output_tensors = [] - for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) - self.output_tensors.append(output_tensor) - - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - images.append(img) - return images - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - left = int(np.min(box[:, 0])) - right = int(np.max(box[:, 0])) - top = int(np.min(box[:, 1])) - bottom = int(np.max(box[:, 1])) - bbox_height = bottom - top - bbox_width = right - left - diffh = math.fabs(box[0, 1] - box[1, 1]) - diffw = math.fabs(box[0, 0] - box[3, 0]) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 10 or rect_height <= 10: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - - def order_points_clockwise(self, pts): - """ - reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py - # sort the points based on their x-coordinates - """ - xSorted = pts[np.argsort(pts[:, 0]), :] - - # grab the left-most and right-most points from the sorted - # x-roodinate points - leftMost = xSorted[:2, :] - rightMost = xSorted[2:, :] - - # now, sort the left-most coordinates according to their - # y-coordinates so we can grab the top-left and bottom-left - # points, respectively - leftMost = leftMost[np.argsort(leftMost[:, 1]), :] - (tl, bl) = leftMost - - rightMost = rightMost[np.argsort(rightMost[:, 1]), :] - (tr, br) = rightMost - - rect = np.array([tl, tr, br, bl], dtype="float32") - return rect - - def detect_text(self, - images=[], - paths=[], - use_gpu=False, - output_dir='detection_result', - visualization=False, - box_thresh=0.5): - """ - Get the text box in the predicted images. - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths - paths (list[str]): The paths of images. If paths not images - use_gpu (bool): Whether to use gpu. Default false. - output_dir (str): The directory to store output images. - visualization (bool): Whether to save image or not. - box_thresh(float): the threshold of the detected text box's confidence - Returns: - res (list): The result of text detection box and save path of images. - """ - self.check_requirements() - - from chinese_text_detection_db_server.processor import DBPreProcess, DBPostProcess, draw_boxes, get_image_ext - - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." - ) - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError("The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - preprocessor = DBPreProcess() - postprocessor = DBPostProcess(box_thresh) - - all_imgs = [] - all_ratios = [] - all_results = [] - for original_image in predicted_data: - im, ratio_list = preprocessor(original_image) - res = {'save_path': ''} - if im is None: - res['data'] = [] - - else: - im = im.copy() - starttime = time.time() - self.input_tensor.copy_from_cpu(im) - self.predictor.zero_copy_run() - data_out = self.output_tensors[0].copy_to_cpu() - dt_boxes_list = postprocessor(data_out, [ratio_list]) - boxes = self.filter_tag_det_res(dt_boxes_list[0], - original_image.shape) - res['data'] = boxes.astype(np.int).tolist() - - all_imgs.append(im) - all_ratios.append(ratio_list) - if visualization: - img = Image.fromarray( - cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) - draw_img = draw_boxes(img, boxes) - draw_img = np.array(draw_img) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - ext = get_image_ext(original_image) - saved_name = 'ndarray_{}{}'.format(time.time(), ext) - cv2.imwrite( - os.path.join(output_dir, saved_name), - draw_img[:, :, ::-1]) - res['save_path'] = os.path.join(output_dir, saved_name) - - all_results.append(res) - - return all_results - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.pretrained_model_path, 'model') - params_file_path = os.path.join(self.pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.detect_text(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.detect_text( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - db = ChineseTextDetectionDBServer() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg' - ] - res = db.detect_text(paths=image_path, visualization=True) - db.save_inference_model('save') - print(res) diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/processor.py b/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/processor.py deleted file mode 100644 index aec5a11953bc094e21401acb81ca0074e22fd5de..0000000000000000000000000000000000000000 --- a/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/processor.py +++ /dev/null @@ -1,237 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys - -from PIL import Image, ImageDraw, ImageFont -from shapely.geometry import Polygon -import cv2 -import numpy as np -import pyclipper - - -class DBPreProcess(object): - def __init__(self, max_side_len=960): - self.max_side_len = max_side_len - - def resize_image_type(self, im): - """ - resize image to a size multiple of 32 which is required by the network - """ - h, w, _ = im.shape - - resize_w = w - resize_h = h - - # limit the max side - if max(resize_h, resize_w) > self.max_side_len: - if resize_h > resize_w: - ratio = float(self.max_side_len) / resize_h - else: - ratio = float(self.max_side_len) / resize_w - else: - ratio = 1. - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - if resize_h % 32 == 0: - resize_h = resize_h - elif resize_h // 32 <= 1: - resize_h = 32 - else: - resize_h = (resize_h // 32 - 1) * 32 - if resize_w % 32 == 0: - resize_w = resize_w - elif resize_w // 32 <= 1: - resize_w = 32 - else: - resize_w = (resize_w // 32 - 1) * 32 - try: - if int(resize_w) <= 0 or int(resize_h) <= 0: - return None, (None, None) - im = cv2.resize(im, (int(resize_w), int(resize_h))) - except: - print(im.shape, resize_w, resize_h) - sys.exit(0) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - return im, (ratio_h, ratio_w) - - def normalize(self, im): - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - im = im.astype(np.float32, copy=False) - im = im / 255 - im -= img_mean - im /= img_std - channel_swap = (2, 0, 1) - im = im.transpose(channel_swap) - return im - - def __call__(self, im): - im, (ratio_h, ratio_w) = self.resize_image_type(im) - im = self.normalize(im) - im = im[np.newaxis, :] - return [im, (ratio_h, ratio_w)] - - -class DBPostProcess(object): - """ - The post process for Differentiable Binarization (DB). - """ - - def __init__(self, thresh=0.3, box_thresh=0.5, max_candidates=1000): - self.thresh = thresh - self.box_thresh = box_thresh - self.max_candidates = max_candidates - self.min_size = 3 - - def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' - _bitmap: single map with shape (1, H, W), - whose values are binarized as {0, 1} - ''' - - bitmap = _bitmap - height, width = bitmap.shape - - outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, - cv2.CHAIN_APPROX_SIMPLE) - if len(outs) == 3: - img, contours, _ = outs[0], outs[1], outs[2] - elif len(outs) == 2: - contours, _ = outs[0], outs[1] - - num_contours = min(len(contours), self.max_candidates) - boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) - scores = np.zeros((num_contours, ), dtype=np.float32) - - for index in range(num_contours): - contour = contours[index] - points, sside = self.get_mini_boxes(contour) - if sside < self.min_size: - continue - points = np.array(points) - score = self.box_score_fast(pred, points.reshape(-1, 2)) - if self.box_thresh > score: - continue - - box = self.unclip(points).reshape(-1, 1, 2) - box, sside = self.get_mini_boxes(box) - if sside < self.min_size + 2: - continue - box = np.array(box) - if not isinstance(dest_width, int): - dest_width = dest_width.item() - dest_height = dest_height.item() - - box[:, 0] = np.clip( - np.round(box[:, 0] / width * dest_width), 0, dest_width) - box[:, 1] = np.clip( - np.round(box[:, 1] / height * dest_height), 0, dest_height) - boxes[index, :, :] = box.astype(np.int16) - scores[index] = score - return boxes, scores - - def unclip(self, box, unclip_ratio=2.0): - poly = Polygon(box) - distance = poly.area * unclip_ratio / poly.length - offset = pyclipper.PyclipperOffset() - offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) - expanded = np.array(offset.Execute(distance)) - return expanded - - def get_mini_boxes(self, contour): - bounding_box = cv2.minAreaRect(contour) - points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) - - index_1, index_2, index_3, index_4 = 0, 1, 2, 3 - if points[1][1] > points[0][1]: - index_1 = 0 - index_4 = 1 - else: - index_1 = 1 - index_4 = 0 - if points[3][1] > points[2][1]: - index_2 = 2 - index_3 = 3 - else: - index_2 = 3 - index_3 = 2 - - box = [ - points[index_1], points[index_2], points[index_3], points[index_4] - ] - return box, min(bounding_box[1]) - - def box_score_fast(self, bitmap, _box): - h, w = bitmap.shape[:2] - box = _box.copy() - xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) - xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) - ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) - ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) - - mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) - box[:, 0] = box[:, 0] - xmin - box[:, 1] = box[:, 1] - ymin - cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) - return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - - def __call__(self, predictions, ratio_list): - pred = predictions[:, 0, :, :] - segmentation = pred > self.thresh - - boxes_batch = [] - for batch_index in range(pred.shape[0]): - height, width = pred.shape[-2:] - tmp_boxes, tmp_scores = self.boxes_from_bitmap( - pred[batch_index], segmentation[batch_index], width, height) - - boxes = [] - for k in range(len(tmp_boxes)): - if tmp_scores[k] > self.box_thresh: - boxes.append(tmp_boxes[k]) - if len(boxes) > 0: - boxes = np.array(boxes) - - ratio_h, ratio_w = ratio_list[batch_index] - boxes[:, :, 0] = boxes[:, :, 0] / ratio_w - boxes[:, :, 1] = boxes[:, :, 1] / ratio_h - - boxes_batch.append(boxes) - return boxes_batch - - -def draw_boxes(image, boxes, scores=None, drop_score=0.5): - img = image.copy() - draw = ImageDraw.Draw(img) - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score: - continue - draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red') - draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red') - draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red') - draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red') - draw.line([(box[0][0] - 1, box[0][1] + 1), - (box[1][0] - 1, box[1][1] + 1)], - fill='red') - draw.line([(box[1][0] - 1, box[1][1] + 1), - (box[2][0] - 1, box[2][1] + 1)], - fill='red') - draw.line([(box[2][0] - 1, box[2][1] + 1), - (box[3][0] - 1, box[3][1] + 1)], - fill='red') - draw.line([(box[3][0] - 1, box[3][1] + 1), - (box[0][0] - 1, box[0][1] + 1)], - fill='red') - return img - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" diff --git a/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding/module.py b/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding/module.py deleted file mode 100644 index b12bc1faa1e5898964b66ebb0c6d9810dc2101c3..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding/module.py +++ /dev/null @@ -1,170 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import io -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - for line in f: - parts = line.split("\t") - vocab[parts[0]] = int(parts[1]) - - return vocab - - -@moduleinfo( - name="tencent_ailab_chinese_embedding", - version="1.0.0", - summary= - "Tencent AI Lab Embedding Corpus for Chinese Words and Phrases and the vocab size is 8,824,331. For more information, please refer to https://ai.tencent.com/ailab/nlp/zh/embedding.html", - author="", - author_email="", - type="nlp/semantic_model") -class TencentAILabChineseEmbedding(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained tencent_ailab_chinese_embedding - - Args: - trainable(bool): whether fine-tune the pretrained parameters of simnet_bow or not - num_slots(int): It's number of slots inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of tencent_ailab_chinese_embedding (words) - outputs(dict): the output variables of input words (word embeddings) - main_program(Program): the main_program of tencent_ailab_chinese_embedding with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - - text_1 = fluid.data( - name='text', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_1 = fluid.embedding( - input=text_1, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3'], - list(main_program.global_block().vars.keys())) - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, - prefix=prefix_name, - vars=variable_names) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the pretrained model - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % - (index + 1)] = main_program.global_block().vars[ - prefix_name + emb_name_list[index]] - - return inputs, outputs, main_program - - def get_vocab_path(self): - return self.vocab_path - - -if __name__ == "__main__": - w2v = TencentAILabChineseEmbedding() - inputs, outputs, program = w2v.context(num_slots=3) - print(inputs) - print(outputs) - print(w2v.get_vocab_path()) diff --git a/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py b/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py deleted file mode 100644 index bab0289d286f310bca87b9fecbd960fdecf4730f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py +++ /dev/null @@ -1,170 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import io -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - for line in f: - parts = line.split("\t") - vocab[parts[0]] = int(parts[1]) - - return vocab - - -@moduleinfo( - name="tencent_ailab_chinese_embedding_small", - version="1.0.0", - summary= - "Tencent AI Lab Embedding Corpus for Chinese Words and Phrases and the vocab size is 2,000,002. For more information, please refer to https://ai.tencent.com/ailab/nlp/zh/embedding.html", - author="", - author_email="", - type="nlp/semantic_model") -class TencentAILabChineseEmbeddingSmall(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained word2vec_skipgram - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of tencent_ailab_chinese_embedding_small or not. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of tencent_ailab_chinese_embedding_small (words) - outputs(dict): the output variables of input words (word embeddings) - main_program(Program): the main_program of tencent_ailab_chinese_embedding_small with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - - text_1 = fluid.data( - name='text', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_1 = fluid.embedding( - input=text_1, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3'], - list(main_program.global_block().vars.keys())) - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, - prefix=prefix_name, - vars=variable_names) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the pretrained model - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % - (index + 1)] = main_program.global_block().vars[ - prefix_name + emb_name_list[index]] - - return inputs, outputs, main_program - - def get_vocab_path(self): - return self.vocab_path - - -if __name__ == "__main__": - w2v = TencentAILabChineseEmbeddingSmall() - inputs, outputs, program = w2v.context(num_slots=3) - print(inputs) - print(outputs) - print(w2v.get_vocab_path()) diff --git a/hub_module/modules/text/embedding/word2vec_skipgram/module.py b/hub_module/modules/text/embedding/word2vec_skipgram/module.py deleted file mode 100644 index 40b5ce337e01b8046e4f85f80c5fd68639fe2477..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/embedding/word2vec_skipgram/module.py +++ /dev/null @@ -1,182 +0,0 @@ -# -*- coding:utf-8 -*- -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import io -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - for line in f: - parts = line.strip().split("\t") - vocab[parts[0]] = int(parts[1]) - - return vocab - - -@moduleinfo( - name="word2vec_skipgram", - version="1.1.0", - summary="Chinese word embedding based on the SkipGram.", - author="baidu-nlp", - author_email="", - type="nlp/semantic_model") -class Word2vecSkipGram(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained word2vec_skipgram - - Args: - trainable(bool): whether fine-tune the pretrained parameters of word2vec_skipgram or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of word2vec_skipgram (words) - outputs(dict): the output variables of input words (word embeddings) - main_program(Program): the main_program of word2vec_skipgram with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - - text_1 = fluid.data( - name='text', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_1 = fluid.embedding( - input=text_1, - is_sparse=True, - size=[len(self.vocab), 128], - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - is_sparse=True, - size=[len(self.vocab), 128], - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - is_sparse=True, - size=[len(self.vocab), 128], - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3'], - list(main_program.global_block().vars.keys())) - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, - prefix=prefix_name, - vars=variable_names) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the pretrained model - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % - (index + 1)] = main_program.global_block().vars[ - prefix_name + emb_name_list[index]] - - return inputs, outputs, main_program - - def get_vocab_path(self): - return self.vocab_path - - -if __name__ == "__main__": - w2v = Word2vecSkipGram() - i, o, p = w2v.context(num_slots=3) - print(w2v.get_vocab_path()) diff --git a/hub_module/modules/text/lexical_analysis/jieba_paddle/module.py b/hub_module/modules/text/lexical_analysis/jieba_paddle/module.py deleted file mode 100644 index 4b5136d24f8a6c1592fa24249da89aa464dde240..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/lexical_analysis/jieba_paddle/module.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - - -@moduleinfo( - name="jieba_paddle", - version="1.0.0", - summary= - "jieba_paddle is a chineses tokenizer using BiGRU base on the PaddlePaddle deeplearning framework. More information please refer to https://github.com/fxsjy/jieba.", - author="baidu-paddle", - author_email="paddle-dev@gmail.com", - type="nlp/lexical_analysis") -class JiebaPaddle(hub.Module): - def _initialize(self): - pass - - @serving - def cut(self, sentence, use_paddle=True, cut_all=False, HMM=True): - """ - The main function that segments an entire sentence that contains - Chinese characters into separated words. - Args: - sentence(str): The str(unicode) to be segmented. - use_paddle(bool): Whether use jieba paddle model or not. Default as true. - cut_all(bool): Model type. True for full pattern, False for accurate pattern. - HMM(bool): Whether to use the Hidden Markov Model. - - Returns: - results(dict): The word segmentation result of the input sentence, whose key is 'word'. - """ - self.check_dependency() - import jieba - jieba.setLogLevel(logging.ERROR) - jieba._compat.setLogLevel(logging.ERROR) - - if use_paddle: - jieba.enable_paddle() - res = " ".join(jieba.cut(sentence, use_paddle=True)) - seg_list = res.strip(" ").split(" ") - else: - res = " ".join(jieba.cut(sentence, cut_all=cut_all, HMM=HMM)) - seg_list = res.strip(" ").split(" ") - - return seg_list - - def check_dependency(self): - """ - Check jieba tool dependency. - """ - try: - import jieba - except ImportError: - print( - 'This module requires jieba tools. The running enviroment does not meet the requirments. Please install jieba packages.' - ) - exit() - - def cut_for_search(self, sentence, HMM=True): - """ - Finer segmentation for search engines. - Args: - sentence(str): The str(unicode) to be segmented. - HMM(bool): Whether to use the Hidden Markov Model. - - Returns: - results(dict): The word segmentation result of the input sentence, whose key is 'word'. - """ - self.check_dependency() - import jieba - jieba.setLogLevel(logging.ERROR) - res = " ".join(jieba.cut_for_search(sentence, HMM=HMM)) - seg_list = res.strip(" ").split(" ") - return seg_list - - def load_userdict(self, user_dict): - ''' - Load personalized dict to improve detect rate. - Args: - user_dict(str): A plain text file path. It contains words and their ocurrences. Can be a file-like object, or the path of the dictionary file, - whose encoding must be utf-8. - Structure of dict file: - word1 freq1 word_type1 - word2 freq2 word_type2 - ... - - Word type may be ignored - ''' - self.check_dependency() - import jieba - jieba.setLogLevel(logging.ERROR) - jieba.load_userdict("userdict.txt") - - def extract_tags(self, - sentence, - topK=20, - withWeight=False, - allowPOS=(), - withFlag=False): - """ - Extract keywords from sentence using TF-IDF algorithm. - Args: - topK(int): return how many top keywords. `None` for all possible words. - withWeight(bool): if True, return a list of (word, weight); - if False, return a list of words. - allowPOS(tuple): the allowed POS list eg. ['ns', 'n', 'vn', 'v','nr']. - if the POS of w is not in this list,it will be filtered. - withFlag(bool): only work with allowPOS is not empty. - if True, return a list of pair(word, weight) like posseg.cut - if False, return a list of words - Returns: - result(list): The key words. - """ - self.check_dependency() - import jieba - import jieba.analyse - jieba.setLogLevel(logging.ERROR) - res = jieba.analyse.extract_tags( - sentence, - topK=topK, - withWeight=withWeight, - allowPOS=allowPOS, - withFlag=withFlag) - return res - - def textrank(self, - sentence, - topK=20, - withWeight=False, - allowPOS=('ns', 'n', 'vn', 'v'), - withFlag=False): - """ - Extract keywords from sentence using TextRank algorithm. - Args: - topK(int): return how many top keywords. `None` for all possible words. - withWeight(bool): if True, return a list of (word, weight); - if False, return a list of words. - allowPOS(tuple): the allowed POS list eg. ['ns', 'n', 'vn', 'v','nr']. - if the POS of w is not in this list,it will be filtered. - withFlag(bool): only work with allowPOS is not empty. - if True, return a list of pair(word, weight) like posseg.cut - if False, return a list of words - Returns: - result(list): The key words. - """ - self.check_dependency() - import jieba - jieba.setLogLevel(logging.ERROR) - res = jieba.analyse.textrank( - sentence, - topK=topK, - withWeight=withWeight, - allowPOS=allowPOS, - withFlag=withFlag) - return res - - -if __name__ == "__main__": - jb_pd = JiebaPaddle() - res = jb_pd.cut( - sentence="我来到北京清华大学", - use_paddle=True, - ) - print(res) - res = jb_pd.cut(sentence="我来到北京清华大学", use_paddle=False, cut_all=True) - print(res) - res = jb_pd.cut(sentence="我来到北京清华大学", use_paddle=False, cut_all=False) - print(res) - res = jb_pd.cut_for_search(sentence="我来到北京清华大学") - print(res) - res = jb_pd.extract_tags(sentence="我来到北京清华大学") - print(res) - res = jb_pd.extract_tags(sentence="我来到北京清华大学", withWeight=True) - print(res) - res = jb_pd.textrank(sentence="我来到北京清华大学", withWeight=True) - print(res) diff --git a/hub_module/modules/text/lexical_analysis/lac/ahocorasick.py b/hub_module/modules/text/lexical_analysis/lac/ahocorasick.py deleted file mode 100644 index 5df1a168bf3ace927bc014cf6a732057ad20599d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/lexical_analysis/lac/ahocorasick.py +++ /dev/null @@ -1,150 +0,0 @@ -# -*- coding: UTF-8 -*- -""" -本模块实现AC自动机封装为Ahocorasick类,用于进行词典的多模匹配。 -""" - - -class Node(object): - """AC自动机的树结点. - - Attributes: - next: dict类型,指向子结点 - fail: Node类型,AC自动机的fail指针 - length: int类型,判断节点是否为单词 - """ - __slots__ = ['next', 'fail', 'length'] - - def __init__(self): - """初始化空节点.""" - self.next = {} - self.fail = None # fail指针默认为None - self.length = -1 - - -class Ahocorasick(object): - """实现AC自动机的类 - - Attributes: - __root: Node类型,AC自动机根节点 - """ - - def __init__(self): - """初始化Ahocorasick的根节点__root""" - self.__root = Node() - - def add_word(self, word): - """添加单词word到Trie树中""" - current = self.__root - for char in word: - current = current.next.setdefault(char, Node()) - current.length = len(word) - - def make(self): - """构建fail指针路径""" - - queue = list() - for key in self.__root.next: - self.__root.next[key].fail = self.__root - queue.append(self.__root.next[key]) - - # 广度优先算法遍历设置fail指针 - while len(queue) > 0: - # 基于当前节点的fail指针设置其子结点的fail指针 - current = queue.pop(0) - - for k in current.next: - current_fail = current.fail - - # 若当前节点有fail指针,尝试设置其子结点的fail指针 - while current_fail is not None: - if k in current_fail.next: - current.next[k].fail = current_fail.next[k] - break - current_fail = current_fail.fail - - # 若当前节点的fail指针不存在该子结点,令子结点fail指向根节点 - if current_fail is None: - current.next[k].fail = self.__root - - queue.append(current.next[k]) - - def search(self, content): - """后向最大匹配. - - 对content的文本进行多模匹配,返回后向最大匹配的结果. - - Args: - content: string类型, 用于多模匹配的字符串 - - Returns: - list类型, 最大匹配单词列表,每个元素为匹配的模式串在句中的起止位置,比如: - [(0, 2), [4, 7]] - - """ - result = [] - p = self.__root - for current_position in range(len(content)): - word = content[current_position] - - # - while word not in p.next: - if p == self.__root: - break - p = p.fail - else: - p = p.next[word] - if p.length > 0: - result.append((current_position - p.length + 1, - current_position)) - - return result - - def search_all(self, content): - """多模匹配的完全匹配. - - 对content的文本进行多模匹配,返回所有匹配结果 - - Args: - content: string类型, 用于多模匹配的字符串 - - Returns: - list类型, 所有匹配单词列表,每个元素为匹配的模式串在句中的起止位置,比如: - [(0, 2), [4, 7]] - - """ - result = [] - p = self.__root - for current_position in range(len(content)): - word = content[current_position] - - while word not in p.next: - if p == self.__root: - break - p = p.fail - else: - p = p.next[word] - - # 回溯查看是否存在以当前字符结尾的单词 - tmp = p - while tmp != self.__root: - if tmp.length > 0: - result.append((current_position - tmp.length + 1, - current_position)) - tmp = tmp.fail - - return result - - -if __name__ == '__main__': - - ah = Ahocorasick() - x = ["百度", "家", "高科技", "科技", "科技公司"] - for i in x: - ah.add_word(i) - ah.make() - string = '百度是家高科技公司' - for begin, end in ah.search_all(string): - print('all:', string[begin:end + 1]) - - for begin, end in ah.search(string): - print('search:', string[begin:end + 1]) diff --git a/hub_module/modules/text/lexical_analysis/lac/custom.py b/hub_module/modules/text/lexical_analysis/lac/custom.py deleted file mode 100644 index 0338be4cdfce9eb846b3bd5d66cebd7d87676617..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/lexical_analysis/lac/custom.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: UTF-8 -*- -""" -该模块实现用户自定义词典的功能 -""" - -from io import open - -from .ahocorasick import Ahocorasick - - -class Customization(object): - """ - 基于AC自动机实现用户干预的功能 - """ - - def __init__(self): - self.dictitem = {} - self.ac = None - pass - - def load_customization(self, filename, sep=None): - """装载人工干预词典""" - self.ac = Ahocorasick() - with open(filename, 'r', encoding='utf8') as f: - for line in f: - if sep == None: - words = line.strip().split() - else: - words = line.strip().split(sep) - - if len(words) == 0: - continue - - phrase = "" - tags = [] - offset = [] - for word in words: - if word.rfind('/') < 1: - phrase += word - tags.append('') - else: - phrase += word[:word.rfind('/')] - tags.append(word[word.rfind('/') + 1:]) - offset.append(len(phrase)) - - if len(phrase) < 2 and tags[0] == '': - continue - - self.dictitem[phrase] = (tags, offset) - self.ac.add_word(phrase) - self.ac.make() - - def parse_customization(self, query, lac_tags): - """使用人工干预词典修正lac模型的输出""" - - def ac_postpress(ac_res): - ac_res.sort() - i = 1 - while i < len(ac_res): - if ac_res[i - 1][0] < ac_res[i][0] and ac_res[i][0] <= ac_res[ - i - 1][1]: - ac_res.pop(i) - continue - i += 1 - return ac_res - - if not self.ac: - print("Customized dict is not loaded.") - return - - ac_res = self.ac.search(query) - - ac_res = ac_postpress(ac_res) - - for begin, end in ac_res: - phrase = query[begin:end + 1] - index = begin - - tags, offsets = self.dictitem[phrase] - for tag, offset in zip(tags, offsets): - while index < begin + offset: - if len(tag) == 0: - lac_tags[index] = lac_tags[index][:-1] + 'I' - else: - lac_tags[index] = tag + "-I" - index += 1 - - lac_tags[begin] = lac_tags[begin][:-1] + 'B' - for offset in offsets: - index = begin + offset - if index < len(lac_tags): - lac_tags[index] = lac_tags[index][:-1] + 'B' diff --git a/hub_module/modules/text/lexical_analysis/lac/module.py b/hub_module/modules/text/lexical_analysis/lac/module.py deleted file mode 100644 index 3f70a97f0097efda93e97d14d214f1e86e7b6aa6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/lexical_analysis/lac/module.py +++ /dev/null @@ -1,532 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import io -import json -import math -import os -import six - -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -import paddlehub as hub -from paddlehub.common.logger import logger -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.common.utils import sys_stdin_encoding -from paddlehub.io.parser import txt_parser -from paddlehub.module.module import moduleinfo, runnable, serving - -from lac.network import lex_net -from lac.processor import load_kv_dict, word_to_ids, parse_result -from lac.custom import Customization - - -class DataFormatError(Exception): - def __init__(self, *args): - self.args = args - - -@moduleinfo( - name="lac", - version="2.2.0", - summary= - "Baidu's open-source lexical analysis tool for Chinese, including word segmentation, part-of-speech tagging & named entity recognition", - author="baidu-nlp", - author_email="paddle-dev@baidu.com", - type="nlp/lexical_analysis") -class LAC(hub.Module): - def _initialize(self, user_dict=None): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "infer_model") - self.word2id_dict = load_kv_dict( - os.path.join(self.directory, "assets/word.dic"), - reverse=True, - value_func=int) - self.id2word_dict = load_kv_dict( - os.path.join(self.directory, "assets/word.dic")) - self.label2id_dict = load_kv_dict( - os.path.join(self.directory, "assets/tag.dic"), - reverse=True, - value_func=int) - self.id2label_dict = load_kv_dict( - os.path.join(self.directory, "assets/tag.dic")) - self.word_replace_dict = load_kv_dict( - os.path.join(self.directory, "assets/q2b.dic")) - self.oov_id = self.word2id_dict['OOV'] - self.word_dict_len = max(map(int, self.word2id_dict.values())) + 1 - self.label_dict_len = max(map(int, self.label2id_dict.values())) + 1 - self.tag_file = os.path.join(self.directory, "assets/tag_file.txt") - - if user_dict: - self.set_user_dict(dict_path=user_dict) - else: - self.custom = None - - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained lac - - Args: - trainable(bool): whether fine-tune the pretrained parameters of lac or not - - Returns: - inputs(dict): the input variables of lac (words) - outputs(dict): the output variables of lac (the word segmentation results) - main_program(Program): the main_program of lac with pretrained prameters - """ - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - crf_decode, word, fc = lex_net(self.word_dict_len, - self.label_dict_len) - word_name = word.name - pred_name = crf_decode.name - fc_name = fc.name - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the lac pretrained model - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = { - "words": - main_program.global_block().vars[prefix_name + word_name] - } - outputs = { - "predicted": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - return inputs, outputs, main_program - - def set_user_dict(self, dict_path, sep=None): - """ - Set the costomized dictionary if you wanna exploit the self-defined dictionary - - Args: - dict_path(str): The directory to the costomized dictionary. - sep: The seperation token in phases. Default as ' ' or '\t'. - """ - if not os.path.exists(dict_path): - raise RuntimeError("File %s is not exist." % dict_path) - self.custom = Customization() - self.custom.load_customization(dict_path, sep) - - def del_user_dict(self): - """ - Delete the costomized dictionary if you don't wanna exploit the self-defined dictionary any longer - """ - - if self.custom: - self.custom = None - print("Successfully delete the customized dictionary!") - - def to_unicode(self, texts): - """ - Convert each element's type(str) of texts(list) to unicode in python2.7 - - Args: - texts(list): each element's type is str in python2.7 - - Returns: - texts(list): each element's type is unicode in python2.7 - """ - if six.PY2: - unicode_texts = [] - for text in texts: - if isinstance(text, six.string_types): - unicode_texts.append( - text.decode(sys_stdin_encoding()).decode("utf8")) - else: - unicode_texts.append(text) - texts = unicode_texts - return texts - - def texts2tensor(self, texts): - """ - Tranform the texts(list) to PaddleTensor - Args: - texts(list): texts - Returns: - tensor(PaddleTensor): tensor with texts data - """ - lod = [0] - data = [] - for i, text in enumerate(texts): - text_inds = word_to_ids( - text, - self.word2id_dict, - self.word_replace_dict, - oov_id=self.oov_id) - data += text_inds - lod.append(len(text_inds) + lod[i]) - tensor = PaddleTensor(np.array(data).astype('int64')) - tensor.name = "words" - tensor.lod = [lod] - tensor.shape = [lod[-1], 1] - return tensor - - def _get_index(self, data_list, item=""): - """ - find all indexes of item in data_list - """ - res = [] - for index, data in enumerate(data_list): - if data == item: - res.append(index) - return res - - @serving - def cut(self, text, use_gpu=False, batch_size=1, return_tag=True): - """ - The main function that segments an entire text that contains - Chinese characters into separated words. - Args: - text(:obj:`str` or :obj:`List[str]`): The chinese texts to be segmented. This can be a string, a list of strings. - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - return_tag: Whether to get tag or not. - - Returns: - results(dict or list): The word segmentation result of the input text, whose key is 'word', if text is a list. - If text is a str, the word segmentation result (list) is obtained. - - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if isinstance(text, list) and len(text) != 0: - - predicted_data = self.to_unicode(text) - - # drop the empty string like "" in predicted_data - empty_str_indexes = self._get_index(predicted_data) - predicted_data = [data for data in predicted_data if data != ""] - - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:( - start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - tensor_words = self.texts2tensor(batch_data) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = parse_result( - batch_data, - batch_out[0], - self.id2label_dict, - interventer=self.custom) - results += batch_result - - for index in empty_str_indexes: - results.insert(index, {"word": [""], "tag": [""]}) - - if not return_tag: - for result in results: - result = result.pop("tag") - return results - - return results - elif isinstance(text, str) and text != "": - tensor_words = self.texts2tensor([text]) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = parse_result([text], - batch_out[0], - self.id2label_dict, - interventer=self.custom) - - return batch_result[0]['word'] - elif text == "": - return text - else: - raise TypeError("The input data is inconsistent with expectations.") - - def lexical_analysis(self, - texts=[], - data={}, - use_gpu=False, - batch_size=1, - return_tag=True): - """ - Get the word segmentation results with the texts as input - - Args: - texts(list): the input texts to be segmented, if texts not data - data(dict): key must be 'text', value is the texts to be segmented, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - return_tag: Whether to get tag or not. - - Returns: - results(list): the word segmentation results - """ - - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise TypeError("The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - - # drop the empty string like "" in predicted_data - empty_str_indexes = self._get_index(predicted_data) - predicted_data = [data for data in predicted_data if data != ""] - - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - tensor_words = self.texts2tensor(batch_data) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = parse_result( - batch_data, - batch_out[0], - self.id2label_dict, - interventer=self.custom) - results += batch_result - - for index in empty_str_indexes: - results.insert(index, {"word": [""], "tag": [""]}) - - if not return_tag: - for result in results: - result = result.pop("tag") - return results - - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the lac module.", - prog='hub run lac', - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - if args.user_dict: - self.set_user_dict(args.user_dict) - - results = self.lexical_analysis( - texts=input_data, - use_gpu=args.use_gpu, - batch_size=args.batch_size, - return_tag=args.return_tag) - - return results - - def get_tags(self): - """ - Get the tags which was used when pretraining lac - - Returns: - self.tag_name_dict(dict):lac tags - """ - self.tag_name_dict = {} - with io.open(self.tag_file, encoding="utf8") as f: - for line in f: - tag, tag_name = line.strip().split(" ") - self.tag_name_dict[tag] = tag_name - return self.tag_name_dict - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - self.arg_config_group.add_argument( - '--user_dict', - type=str, - default=None, - help= - "customized dictionary for intervening the word segmentation result" - ) - self.arg_config_group.add_argument( - '--return_tag', - type=ast.literal_eval, - default=True, - help="whether return tags of results or not") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - self.arg_input_group.add_argument( - '--input_text', type=str, default=None, help="text to predict") - - def check_input_data(self, args): - input_data = [] - if args.input_file: - if not os.path.exists(args.input_file): - print("File %s is not exist." % args.input_file) - raise RuntimeError - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - elif args.input_text: - if args.input_text.strip() != '': - if six.PY2: - input_data = [ - args.input_text.decode( - sys_stdin_encoding()).decode("utf8") - ] - else: - input_data = [args.input_text] - - if input_data == []: - print("ERROR: The input data is inconsistent with expectations.") - raise DataFormatError - - return input_data - - -if __name__ == '__main__': - lac = LAC(user_dict="user.dict") - # or use the fuction user_dict to set - # lac.set_user_dict("user.dict") - - test_text = [ - "今天是个好日子", "天气预报说今天要下雨", "", "下一班地铁马上就要到了", "", "调料份量不能多,也不能少,味道才能正好", - "", "", "春天的花开秋天的风以及冬天的落阳" - ] - - # execute predict and print the result - results = lac.cut( - text=test_text, use_gpu=True, batch_size=7, return_tag=True) - for result in results: - if six.PY2: - print( - json.dumps(result['word'], encoding="utf8", ensure_ascii=False)) - print( - json.dumps(result['tag'], encoding="utf8", ensure_ascii=False)) - else: - print(result['word']) - print(result['tag']) - - # delete the costomized dictionary - lac.del_user_dict() - - results = lac.cut( - text="春天的花开秋天的风以及冬天的落阳", use_gpu=False, batch_size=1, return_tag=False) - print(results) - - # get the tags that was exploited as pretraining lac - print(lac.get_tags()) diff --git a/hub_module/modules/text/lexical_analysis/lac/network.py b/hub_module/modules/text/lexical_analysis/lac/network.py deleted file mode 100755 index 6a85b32aae0105287817ec4b27d4a3202c4e1d44..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/lexical_analysis/lac/network.py +++ /dev/null @@ -1,100 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def lex_net(word_dict_len, label_dict_len): - """ - define the lexical analysis network structure - """ - word_emb_dim = 128 - grnn_hidden_dim = 128 - emb_lr = 2 - crf_lr = 0.2 - bigru_num = 2 - init_bound = 0.1 - IS_SPARSE = True - - def _bigru_layer(input_feature): - """ - define the bidirectional gru layer - """ - pre_gru = fluid.layers.fc( - input=input_feature, - size=grnn_hidden_dim * 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=1e-4))) - gru = fluid.layers.dynamic_gru( - input=pre_gru, - size=grnn_hidden_dim, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=1e-4))) - - pre_gru_r = fluid.layers.fc( - input=input_feature, - size=grnn_hidden_dim * 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=1e-4))) - gru_r = fluid.layers.dynamic_gru( - input=pre_gru_r, - size=grnn_hidden_dim, - is_reverse=True, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=1e-4))) - - bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1) - return bi_merge - - def _net_conf(word): - """ - Configure the network - """ - word_embedding = fluid.layers.embedding( - input=word, - size=[word_dict_len, word_emb_dim], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr=fluid.ParamAttr( - learning_rate=emb_lr, - name="word_emb", - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound))) - - input_feature = word_embedding - for i in range(bigru_num): - bigru_output = _bigru_layer(input_feature) - input_feature = bigru_output - - emission = fluid.layers.fc( - size=label_dict_len, - input=bigru_output, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( - low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=1e-4))) - - size = emission.shape[1] - fluid.layers.create_parameter( - shape=[size + 2, size], dtype=emission.dtype, name='crfw') - crf_decode = fluid.layers.crf_decoding( - input=emission, param_attr=fluid.ParamAttr(name='crfw')) - - return crf_decode, emission - - word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) - - crf_decode, emission = _net_conf(word) - - return crf_decode, word, emission diff --git a/hub_module/modules/text/lexical_analysis/lac/processor.py b/hub_module/modules/text/lexical_analysis/lac/processor.py deleted file mode 100644 index d37d30d7ee42823e034d66166bbf09a1ccffbc3d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/lexical_analysis/lac/processor.py +++ /dev/null @@ -1,331 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np -import os -import six - - -class Query(object): - def __init__(self, lac_query): - self.set_query(lac_query) - - def set_query(self, lac_query): - """ - self.lac_query_list = ["我/r", "和/c", "妈妈/n", "经常/d", "过去/v", "那儿/r", "散步/v"] - self.seg_query_list = ["我", "和", "妈妈", "经常", "过去", "那儿", "散步"] - self.seg_query_str = "我 和 妈妈 经常 过去 那儿 散步" - self.ori_query_str = "我和妈妈经常过去那儿散步" - """ - length = len(lac_query['word']) - if six.PY2: - self.lac_query_list = [ - lac_query["word"][index].encode("utf8") + "/" + - lac_query["tag"][index].encode("utf8") - for index in range(length) - ] - else: - self.lac_query_list = [ - lac_query["word"][index] + "/" + lac_query["tag"][index] - for index in range(length) - ] - - self.seg_query_list = [] - for phrase in self.lac_query_list: - index = phrase.rfind("/") - word = phrase[0:index] - self.seg_query_list.append(word) - self.seg_query_str = " ".join(self.seg_query_list) - self.ori_query_str = "".join(self.seg_query_list) - - -class Bound(object): - def __init__(self, - start_index=0, - end_index=0, - left_bound=0, - right_bound=0, - left_char_bound=0, - right_char_bound=0): - self.start_index = start_index # 命中的词的起始位置,char级别 - self.end_index = end_index # 命中的词的结束位置,char级别 - self.left_bound = left_bound # 原分词级别的起始位置 - self.right_bound = right_bound # 原分词级别的结束位置 - self.left_char_bound = left_char_bound # 原 char 级别的起始位置 - self.right_char_bound = right_char_bound # 原 char 级别的结束位置 - - -class Interventer(object): - def __init__(self, ngram_dict_path, user_dict_path): - self.ngram_dict_path = ngram_dict_path - self.user_dict_path = user_dict_path - self.init_pos_types() - self.load_dict() - - def init_pos_types(self): - all_pos_types = "n f s t nr ns nt nw nz v vd vn" \ - + " a ad an d m q r p c u xc w PER LOC ORG TIME" - self.all_pos_types = set( - [pos_type.lower() for pos_type in all_pos_types.split(" ")]) - - def load_dict(self): - """load unigram dict and user dict""" - import ahocorasick - self.total_count = 0.0 - self.ngram_dict = {} - print("Loading dict...") - for line in io.open(self.ngram_dict_path, mode="r", encoding="utf-8"): - if six.PY2: - word, pos, wordfreq = line.encode("utf-8").strip('\n').split( - '\t') - else: - word, pos, wordfreq = line.strip('\n').split('\t') - wordfreq = int(wordfreq) - if pos.lower() not in self.all_pos_types: - continue - assert wordfreq > 0, "Word frequency must be postive integer!" - self.total_count += wordfreq - self.ngram_dict[word + "/" + pos] = wordfreq - for key in self.ngram_dict: - wordfreq = self.ngram_dict[key] - self.ngram_dict[key] = np.log(wordfreq / self.total_count) - self.oov_score = np.log(1 / self.total_count) - - self.user_dict = ahocorasick.Automaton() - for line in io.open(self.user_dict_path, mode="r", encoding="utf-8"): - if six.PY2: - word, pos, wordfreq = line.encode("utf-8").strip('\n').split( - '\t') - else: - word, pos, wordfreq = line.strip('\n').split('\t') - wordfreq = int(wordfreq) - assert pos in self.all_pos_types, "Invalid POS type" - assert wordfreq > 0, "Word frequency must be postive integer!" - self.ngram_dict[word + "/" + pos] = np.log( - wordfreq / self.total_count) - self.user_dict.add_word(word, (word, pos, wordfreq)) - self.user_dict.make_automaton() - - def find_min_bound(self, match_info, query): - """ - find minimum Bound for match_word - """ - end_index, (match_word, pos, wordfreq) = match_info - start_index = end_index - len(match_word) + 1 - - bound = Bound(start_index=start_index, end_index=end_index) - - # find left bound - query_len = 0 - for word_index, word in enumerate(query.seg_query_list): - query_len += len(word) - if query_len > start_index: - bound.left_bound = word_index - bound.left_char_bound = query_len - len(word) - break - # find right bound - query_len = 0 - for word_index, word in enumerate(query.seg_query_list): - query_len += len(word) - if query_len > end_index: - bound.right_bound = word_index - bound.right_char_bound = query_len - 1 - break - return bound - - def calc_lm_score(self, phrase_list): - """calculate the language model score""" - lm_score = 0.0 - if len(phrase_list) == 0: - return 0.0 - for phrase in phrase_list: - lm_score += self.ngram_dict.get(phrase, self.oov_score) - return lm_score / len(phrase_list) - - def get_new_phrase_list(self, match_info, bound, query): - """ - 比较用户词典给出的词和原分词结果,根据打分决定是否替换 - """ - new_phrase_list = [] - phrase_left = query.ori_query_str[bound.left_char_bound:bound. - start_index] - phrase_right = query.ori_query_str[bound.end_index + - 1:bound.right_char_bound + 1] - if phrase_left != "": - phrase_left += "/" + query.lac_query_list[bound.left_bound].split( - '/')[1] - new_phrase_list.append(phrase_left) - new_phrase_list.append(match_info[1][0] + "/" + match_info[1][1]) - if phrase_right != "": - phrase_right += "/" + query.lac_query_list[bound.right_bound].split( - '/')[1] - new_phrase_list.append(phrase_right) - - new_query_list = query.lac_query_list[0: bound.left_bound] + new_phrase_list + \ - query.lac_query_list[bound.right_bound + 1: ] - new_lm_score = self.calc_lm_score(new_query_list) - return new_lm_score, new_phrase_list - - def run(self, query): - """ - step 1, 用AC自动机检测出匹配到的用户词 - step 2, 每个用户词查找最小分词边界,计算每种分词结果的打分,PK - step 3, 怎么处理冲突? - 3.a. 假设 AC自动机检测到的关键词都是顺序的,那么只需要考虑前后两个的替换词即可 - 3.b. 假如前后两个替换词没有位置冲突,那么直接把前一个加到替换列表里 - 3.c. 假如前后两个替换词有冲突,比较分数,舍弃一个,更新上一个替换的位置 - step 4, 最终依次执行替换 - """ - last_bound = None - last_phrase_list = None - last_lm_score = None - all_result = [] - old_lm_score = self.calc_lm_score(query.lac_query_list) - - for match_info in self.user_dict.iter(query.ori_query_str): - #print "matched: \"%s\" in query: \"%s\"" % (match_info[1][0], query.seg_query_str) - bound = self.find_min_bound(match_info, query) - new_lm_score, new_phrase_list = self.get_new_phrase_list( - match_info, bound, query) - - # 如果打分比原 LAC 结果低,抛弃用户词典里的结果 - if new_lm_score <= old_lm_score: - #print >> sys.stderr, "skipped %s, old_lm_score: %.5f, " \ - # "new_lm_score: %.5f" % (" ".join(new_phrase_list), old_lm_score, new_lm_score) - continue - # 遇到的第一个匹配到的结果 - if last_bound is None: - last_bound = bound - last_phrase_list = new_phrase_list - last_lm_score = new_lm_score - continue - if bound.left_bound > last_bound.right_bound: - # 位置上没有冲突,则把上次的结果加到最终结果中去 - all_result.append((last_bound, last_phrase_list)) - last_bound = bound - last_phrase_list = new_phrase_list - last_lm_score = new_lm_score - else: - # 位置上有冲突 - if new_lm_score > last_lm_score: - # 若分数高于上次结果,则覆盖;否则丢弃 - last_bound = bound - last_phrase_list = new_phrase_list - last_lm_score = new_lm_score - - if last_bound is not None: - all_result.append((last_bound, last_phrase_list)) - - # 合并所有替换的结果 - final_phrase_list = [] - last_index = -1 - for bound, phrase_list in all_result: - final_phrase_list += query.lac_query_list[last_index + 1:bound. - left_bound] + phrase_list - last_index = bound.right_bound - final_phrase_list += query.lac_query_list[last_index + 1:] - - final_result = {'word': [], 'tag': []} - for phrase in final_phrase_list: - index = phrase.rfind("/") - word = phrase[0:index] - tag = phrase[index + 1:] - final_result['word'].append(word) - final_result['tag'].append(tag) - - return final_result - - -def load_kv_dict(dict_path, - reverse=False, - delimiter="\t", - key_func=None, - value_func=None): - """ - Load key-value dict from file - """ - result_dict = {} - for line in io.open(dict_path, "r", encoding='utf8'): - terms = line.strip("\n").split(delimiter) - if len(terms) != 2: - continue - if reverse: - value, key = terms - else: - key, value = terms - if key in result_dict: - raise KeyError("key duplicated with [%s]" % (key)) - if key_func: - key = key_func(key) - if value_func: - value = value_func(value) - result_dict[key] = value - return result_dict - - -def word_to_ids(words, word2id_dict, word_replace_dict, oov_id=None): - """convert word to word index""" - word_ids = [] - for word in words: - word = word_replace_dict.get(word, word) - word_id = word2id_dict.get(word, oov_id) - word_ids.append(word_id) - - return word_ids - - -def parse_result(lines, crf_decode, id2label_dict, interventer=None): - """Convert model's output tensor into string and tags """ - offset_list = crf_decode.lod[0] - crf_decode = crf_decode.as_ndarray() - batch_size = len(offset_list) - 1 - batch_out = [] - for sent_index in range(batch_size): - begin, end = offset_list[sent_index], offset_list[sent_index + 1] - sent = lines[sent_index] - tags = [ - id2label_dict[str(tag_id[0])] for tag_id in crf_decode[begin:end] - ] - - if interventer: - interventer.parse_customization(sent, tags) - - sent_out = [] - tags_out = [] - for ind, tag in enumerate(tags): - # for the first char - if len(sent_out) == 0 or tag.endswith("B") or tag.endswith("S"): - sent_out.append(sent[ind]) - tags_out.append(tag[:-2]) - continue - sent_out[-1] += sent[ind] - tags_out[-1] = tag[:-2] - - seg_result = {"word": sent_out, "tag": tags_out} - batch_out.append(seg_result) - - return batch_out - - -# sent_out = [] -# tags_out = [] -# parital_word = "" -# for ind, tag in enumerate(tags): -# # for the first word -# if parital_word == "": -# parital_word = sent[ind] -# tags_out.append(tag.split('-')[0]) -# continue -# # for the beginning of word -# if tag.endswith("-B") or (tag == "O" and tags[ind - 1] != "O"): -# sent_out.append(parital_word) -# tags_out.append(tag.split('-')[0]) -# parital_word = sent[ind] -# continue -# parital_word += sent[ind] -# # append the last word, except for len(tags)=0 -# if len(sent_out) < len(tags_out): -# sent_out.append(parital_word) -# seg_result = {"word": sent_out, "tag": tags_out} - -# batch_out.append(seg_result) -# return batch_out diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/bert.py b/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/bert.py deleted file mode 100644 index 0dd18fc291687793660d69049ee5875525561f70..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_cased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/module.py b/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/module.py deleted file mode 100644 index 975f689cee95b00c41f2213c6d3562dee639cba7..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_cased_L_12_H_768_A_12.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_cased_L-12_H-768_A-12", - version="1.1.0", - summary= - "bert_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class Bert(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/bert.py b/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/bert.py deleted file mode 100644 index 33736549405b8d05b22a5c587f1e08be0adb4e29..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_cased_L_24_H_1024_A_16.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/module.py b/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/module.py deleted file mode 100644 index 21d098ef74f0787d8dbac48daafa30b661e70474..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_cased_L_24_H_1024_A_16.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_cased_L-24_H-1024_A-16", - version="1.1.0", - summary= - "bert_cased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters ", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class Bert(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/bert.py b/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/bert.py deleted file mode 100644 index 0fb7bc6e1eac76ac0b0753942459d1621c293502..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_chinese_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/module.py b/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/module.py deleted file mode 100644 index 9e7804cda857079c47969cfdbb483ab393356ae1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_chinese_L_12_H_768_A_12.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_chinese_L-12_H-768_A-12", - version="1.1.0", - summary= - "bert_chinese_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters ", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class BertChinese(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertChinese() diff --git a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/bert.py b/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/bert.py deleted file mode 100644 index b754b9b233edd42ac55527a2e015b8ac55185881..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_multi_cased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/module.py b/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/module.py deleted file mode 100644 index 0c7b4bea9528cc6042fae1bfdc434528d3188f54..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_multi_cased_L_12_H_768_A_12.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_multi_cased_L-12_H-768_A-12", - version="1.1.0", - summary= - "bert_multi_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters ", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class Bert(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/bert.py b/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/bert.py deleted file mode 100644 index 04007f393b51b44d53a0f2573d4084cd19d0cbd4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_multi_uncased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/module.py b/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/module.py deleted file mode 100644 index 60e2d6e21f31cc6e52ba865f82a41cd9efaf682d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_multi_uncased_L_12_H_768_A_12.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_multi_uncased_L-12_H-768_A-12", - version="1.0.0", - summary= - "bert_multi_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters ", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class Bert(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/bert.py b/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/bert.py deleted file mode 100644 index 0c383e77d476036454902826224b8d89ab05bd63..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_uncased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/module.py b/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/module.py deleted file mode 100644 index b74fc720c31d22207b34ed88e16c255406fda8f5..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_uncased_L_12_H_768_A_12.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_uncased_L-12_H-768_A-12", - version="1.1.0", - summary= - "bert_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class Bert(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/bert.py b/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/bert.py deleted file mode 100644 index b67c40c3e839b010ea0b405cdc73d608088af51a..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from bert_uncased_L_24_H_1024_A_16.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/module.py b/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/module.py deleted file mode 100644 index badad96ceab9328006408a6a3772fdc4ad208f68..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from bert_uncased_L_24_H_1024_A_16.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="bert_uncased_L-24_H-1024_A-16", - version="1.1.0", - summary= - "bert_uncased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters ", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="nlp/semantic_model", -) -class Bert(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/bert.py b/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/bert.py deleted file mode 100644 index 6a43359dcc83caec2c2677999d016b9a7946ba48..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from chinese_bert_wwm.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm/module.py b/hub_module/modules/text/semantic_model/chinese_bert_wwm/module.py deleted file mode 100644 index b06699f31e317abfd2d3b96ec4ddb5027debaacc..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_bert_wwm/module.py +++ /dev/null @@ -1,75 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from chinese_bert_wwm.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="chinese-bert-wwm", - version="1.0.0", - summary="chinese-bert-wwm, 12-layer, 768-hidden, 12-heads, 110M parameters ", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class BertWwm(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/bert.py b/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/bert.py deleted file mode 100644 index 37bd08935b4e318048b730671788d0bd9470ae44..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from chinese_bert_wwm_ext.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/module.py b/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/module.py deleted file mode 100644 index 2a462c912b5e3d84033dc46bf51634cfe3040292..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from chinese_bert_wwm_ext.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="chinese-bert-wwm-ext", - version="1.0.0", - summary= - "chinese-bert-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters ", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class BertWwm(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/chinese_electra_base/model/electra.py b/hub_module/modules/text/semantic_model/chinese_electra_base/model/electra.py deleted file mode 100644 index 96252ff3972b0e2d7595758844f3f63312c89eff..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_electra_base/model/electra.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""ELECTRA model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from chinese_electra_base.model.transformer_encoder import encoder, pre_process_layer - - -class ElectraConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing electra model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class ElectraModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/chinese_electra_base/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/chinese_electra_base/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_electra_base/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/chinese_electra_base/module.py b/hub_module/modules/text/semantic_model/chinese_electra_base/module.py deleted file mode 100644 index e9c6c17edce48c4ff664d93489d4f1b7601692f5..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_electra_base/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from chinese_electra_base.model.electra import ElectraConfig, ElectraModel - - -@moduleinfo( - name="chinese-electra-base", - version="1.0.0", - summary= - "chinese-electra-base, 12-layer, 768-hidden, 12-heads, 102M parameters", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class Electra(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - electra_config_path = os.path.join(self.directory, "assets", - "config.json") - self.electra_config = ElectraConfig(electra_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - electra = ElectraModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.electra_config, - use_fp16=False) - pooled_output = electra.get_pooled_output() - sequence_output = electra.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Electra() diff --git a/hub_module/modules/text/semantic_model/chinese_electra_small/model/electra.py b/hub_module/modules/text/semantic_model/chinese_electra_small/model/electra.py deleted file mode 100644 index dd1646fa16e016477889dbbdaffc814fba04dc74..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_electra_small/model/electra.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""ELECTRA model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from chinese_electra_small.model.transformer_encoder import encoder, pre_process_layer - - -class ElectraConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing electra model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class ElectraModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = 128 - self._hidden_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._emb_size != self._hidden_size: - emb_out = fluid.layers.fc( - input=emb_out, - size=self._hidden_size, - act=None, - param_attr=fluid.ParamAttr( - name="embeddings_project.w_0", - initializer=self._param_initializer), - num_flatten_dims=2, - bias_attr="embeddings_project.b_0") - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._hidden_size // self._n_head, - d_value=self._hidden_size // self._n_head, - d_model=self._hidden_size, - d_inner_hid=self._hidden_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._hidden_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._hidden_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/chinese_electra_small/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/chinese_electra_small/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_electra_small/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/chinese_electra_small/module.py b/hub_module/modules/text/semantic_model/chinese_electra_small/module.py deleted file mode 100644 index 9e3129524bb1561f1e3de2dff45e5f60f37f40fd..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_electra_small/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from chinese_electra_small.model.electra import ElectraConfig, ElectraModel - - -@moduleinfo( - name="chinese-electra-small", - version="1.0.0", - summary= - "chinese-electra-small, 12-layer, 256-hidden, 4-heads, 12M parameters", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class Electra(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - electra_config_path = os.path.join(self.directory, "assets", - "config.json") - self.electra_config = ElectraConfig(electra_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - electra = ElectraModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.electra_config, - use_fp16=False) - pooled_output = electra.get_pooled_output() - sequence_output = electra.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = Electra() diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/bert.py b/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/bert.py deleted file mode 100644 index 3a84f5664bfe121975bf9cad84a31e3389e9bacb..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from chinese_roberta_wwm_ext.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/module.py b/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/module.py deleted file mode 100644 index fcfb280c7bbc5a6a3b974d6a6f20e4f987216aac..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from chinese_roberta_wwm_ext.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="chinese-roberta-wwm-ext", - version="1.0.0", - summary= - "chinese-roberta-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters ", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class BertWwm(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/bert.py b/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/bert.py deleted file mode 100644 index 9a437fc470c2b787b2c7924f75046bf2a7b29094..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from chinese_roberta_wwm_ext_large.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/module.py b/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/module.py deleted file mode 100644 index 3a5d14ae8b9226e7f09d29d9172555fcbf7f23e2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/module.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from chinese_roberta_wwm_ext_large.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="chinese-roberta-wwm-ext-large", - version="1.0.0", - summary= - "chinese-roberta-wwm-ext-large, 24-layer, 1024-hidden, 16-heads, 340M parameters ", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class BertWwm(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/ernie/model/ernie.py b/hub_module/modules/text/semantic_model/ernie/model/ernie.py deleted file mode 100644 index 0edea0fb37404085667821eacee7ff13789eaa08..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie/model/ernie.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Ernie model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import absolute_import - -import json - -import six -import paddle.fluid as fluid -from io import open -from paddlehub.common.logger import logger - -from ernie.model.transformer_encoder import encoder, pre_process_layer - - -class ErnieConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path, 'r', encoding='utf8') as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing Ernie model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict.get(key, None) - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - logger.info('%s: %s' % (arg, value)) - logger.info('------------------------------------------------') - - -class ErnieModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/ernie/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/ernie/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/ernie/module.py b/hub_module/modules/text/semantic_model/ernie/module.py deleted file mode 100644 index 25bd31f36bf140447326bedf135dc207fb2fe1f1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie/module.py +++ /dev/null @@ -1,79 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from ernie.model.ernie import ErnieModel, ErnieConfig - - -@moduleinfo( - name="ernie", - version="1.2.0", - summary= - "Baidu's ERNIE, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained", - author="baidu-nlp", - author_email="", - type="nlp/semantic_model", -) -class Ernie(TransformerModule): - def _initialize(self): - ernie_config_path = os.path.join(self.directory, "assets", - "ernie_config.json") - self.ernie_config = ErnieConfig(ernie_config_path) - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt")\ - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - self.ernie_config._config_dict['use_task_id'] = False - ernie = ErnieModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.ernie_config, - use_fp16=False) - pooled_output = ernie.get_pooled_output() - sequence_output = ernie.get_sequence_output() - return pooled_output, sequence_output - - def param_prefix(self): - return "@HUB_ernie-stable@" - - -if __name__ == '__main__': - test_module = Ernie() diff --git a/hub_module/modules/text/semantic_model/ernie_tiny/model/ernie.py b/hub_module/modules/text/semantic_model/ernie_tiny/model/ernie.py deleted file mode 100644 index c0e534043360e0e35c61c4df99a880eb85e767f9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_tiny/model/ernie.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Ernie model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import absolute_import - -import json -import six - -import paddle.fluid as fluid -from io import open -from paddlehub.common.logger import logger - -from ernie_tiny.model.transformer_encoder import encoder, pre_process_layer - - -class ErnieConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path, 'r', encoding='utf8') as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing Ernie model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict.get(key, None) - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - logger.info('%s: %s' % (arg, value)) - logger.info('------------------------------------------------') - - -class ErnieModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - task_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - if config[ - 'sent_type_vocab_size']: # line 47: return self._config_dict.get(key, None) - self._sent_types = config['sent_type_vocab_size'] - else: - self._sent_types = config['type_vocab_size'] - - self._use_task_id = config['use_task_id'] - if self._use_task_id: - self._task_types = config['task_type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._task_emb_name = "task_embedding" - self._dtype = "float16" if use_fp16 else "float32" - self._emb_dtype = "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, task_ids, - input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, - input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - if self._use_task_id: - task_emb_out = fluid.layers.embedding( - task_ids, - size=[self._task_types, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._task_emb_name, - initializer=self._param_initializer)) - - emb_out = emb_out + task_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - if self._dtype == "float16": - self._enc_out = fluid.layers.cast( - x=self._enc_out, dtype=self._emb_dtype) - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_lm_output(self, mask_label, mask_pos): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - self.next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - - # transform: layer norm - mask_trans_feat = fluid.layers.layer_norm( - mask_trans_feat, - begin_norm_axis=len(mask_trans_feat.shape) - 1, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_bias', - initializer=fluid.initializer.Constant(1.))) - # transform: layer norm - #mask_trans_feat = pre_process_layer( - # mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._emb_dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - return mean_mask_lm_loss - - def get_task_output(self, task, task_labels): - task_fc_out = fluid.layers.fc( - input=self.next_sent_feat, - size=task["num_labels"], - param_attr=fluid.ParamAttr( - name=task["task_name"] + "_fc.w_0", - initializer=self._param_initializer), - bias_attr=task["task_name"] + "_fc.b_0") - task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( - logits=task_fc_out, label=task_labels, return_softmax=True) - task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) - mean_task_loss = fluid.layers.mean(task_loss) - return mean_task_loss, task_acc diff --git a/hub_module/modules/text/semantic_model/ernie_tiny/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/ernie_tiny/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_tiny/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/ernie_tiny/module.py b/hub_module/modules/text/semantic_model/ernie_tiny/module.py deleted file mode 100644 index 7fa8d3e856b724f3c578c90702762aa5d4f61e5e..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_tiny/module.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from ernie_tiny.model.ernie import ErnieModel, ErnieConfig - - -@moduleinfo( - name="ernie_tiny", - version="1.1.0", - summary= - "Baidu's ERNIE-tiny, Enhanced Representation through kNowledge IntEgration, tiny version, max_seq_len=512", - author="baidu-nlp", - author_email="", - type="nlp/semantic_model", -) -class ErnieTiny(TransformerModule): - def _initialize(self): - ernie_config_path = os.path.join(self.directory, "assets", - "ernie_tiny_config.json") - self.ernie_config = ErnieConfig(ernie_config_path) - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.spm_path = os.path.join(self.directory, "assets", - "spm_cased_simp_sampled.model") - self.word_dict_path = os.path.join(self.directory, "assets", - "dict.wordseg.pickle") - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - self.ernie_config._config_dict['use_task_id'] = False - ernie = ErnieModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - task_ids=None, - input_mask=input_mask, - config=self.ernie_config, - use_fp16=False) - pooled_output = ernie.get_pooled_output() - sequence_output = ernie.get_sequence_output() - return pooled_output, sequence_output - - def param_prefix(self): - return "@HUB_ernie-tiny@" - - -if __name__ == '__main__': - test_module = ErnieTiny() diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/ernie.py b/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/ernie.py deleted file mode 100644 index eb68c11faac7e5c1e109a65cedfa895eb3e1ddd7..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/ernie.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Ernie model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import absolute_import - -import json - -import six -import paddle.fluid as fluid -from io import open -from paddlehub.common.logger import logger - -from ernie_v2_eng_base.model.transformer_encoder import encoder, pre_process_layer - - -class ErnieConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path, 'r', encoding='utf8') as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing Ernie model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict.get(key, None) - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - logger.info('%s: %s' % (arg, value)) - logger.info('------------------------------------------------') - - -class ErnieModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - task_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - if config['sent_type_vocab_size']: - self._sent_types = config['sent_type_vocab_size'] - else: - self._sent_types = config['type_vocab_size'] - - self._use_task_id = config['use_task_id'] - if self._use_task_id: - self._task_types = config['task_type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._task_emb_name = "task_embedding" - self._dtype = "float16" if use_fp16 else "float32" - self._emb_dtype = "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, task_ids, - input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, - input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - if self._use_task_id: - task_emb_out = fluid.layers.embedding( - task_ids, - size=[self._task_types, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._task_emb_name, - initializer=self._param_initializer)) - - emb_out = emb_out + task_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - if self._dtype == "float16": - next_sent_feat = fluid.layers.cast( - x=next_sent_feat, dtype=self._emb_dtype) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_lm_output(self, mask_label, mask_pos): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - self.next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - if self._dtype == "float16": - mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - - # transform: layer norm - mask_trans_feat = fluid.layers.layer_norm( - mask_trans_feat, - begin_norm_axis=len(mask_trans_feat.shape) - 1, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_bias', - initializer=fluid.initializer.Constant(1.))) - # transform: layer norm - #mask_trans_feat = pre_process_layer( - # mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._emb_dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - return mean_mask_lm_loss - - def get_task_output(self, task, task_labels): - task_fc_out = fluid.layers.fc( - input=self.next_sent_feat, - size=task["num_labels"], - param_attr=fluid.ParamAttr( - name=task["task_name"] + "_fc.w_0", - initializer=self._param_initializer), - bias_attr=task["task_name"] + "_fc.b_0") - task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( - logits=task_fc_out, label=task_labels, return_softmax=True) - task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) - mean_task_loss = fluid.layers.mean(task_loss) - return mean_task_loss, task_acc diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/module.py b/hub_module/modules/text/semantic_model/ernie_v2_eng_base/module.py deleted file mode 100644 index 18e9cd68ded92f9f03abc05f1cd4e51660234dc7..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/module.py +++ /dev/null @@ -1,77 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from ernie_v2_eng_base.model.ernie import ErnieModel, ErnieConfig - - -@moduleinfo( - name="ernie_v2_eng_base", - version="1.1.0", - summary= - "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, A Continual Pre-training Framework for Language Understanding. 12-layer, 768-hidden, 12-heads, 110M parameters.", - author="baidu-nlp", - author_email="", - type="nlp/semantic_model", -) -class ErnieV2EngBase(TransformerModule): - def _initialize(self): - ernie_config_path = os.path.join(self.directory, "assets", - "ernie_config.json") - self.ernie_config = ErnieConfig(ernie_config_path) - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt")\ - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - self.ernie_config._config_dict['use_task_id'] = False - ernie = ErnieModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - task_ids=None, - input_mask=input_mask, - config=self.ernie_config, - use_fp16=False) - pooled_output = ernie.get_pooled_output() - sequence_output = ernie.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = ErnieV2EngBase() diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/ernie.py b/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/ernie.py deleted file mode 100644 index 6562734ac5455393d94a89d27e95d36767811b44..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/ernie.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Ernie model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from __future__ import absolute_import - -import json - -import six -import paddle.fluid as fluid -from io import open -from paddlehub.common.logger import logger - -from ernie_v2_eng_large.model.transformer_encoder import encoder, pre_process_layer - - -class ErnieConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path, 'r', encoding='utf8') as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing Ernie model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict.get(key, None) - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - logger.info('%s: %s' % (arg, value)) - logger.info('------------------------------------------------') - - -class ErnieModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - task_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - if config['sent_type_vocab_size']: - self._sent_types = config['sent_type_vocab_size'] - else: - self._sent_types = config['type_vocab_size'] - - self._use_task_id = config['use_task_id'] - if self._use_task_id: - self._task_types = config['task_type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._task_emb_name = "task_embedding" - self._dtype = "float16" if use_fp16 else "float32" - self._emb_dtype = "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, task_ids, - input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, - input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - if self._use_task_id: - task_emb_out = fluid.layers.embedding( - task_ids, - size=[self._task_types, self._emb_size], - dtype=self._emb_dtype, - param_attr=fluid.ParamAttr( - name=self._task_emb_name, - initializer=self._param_initializer)) - - emb_out = emb_out + task_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - if self._dtype == "float16": - next_sent_feat = fluid.layers.cast( - x=next_sent_feat, dtype=self._emb_dtype) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_lm_output(self, mask_label, mask_pos): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - self.next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - if self._dtype == "float16": - mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - - # transform: layer norm - mask_trans_feat = fluid.layers.layer_norm( - mask_trans_feat, - begin_norm_axis=len(mask_trans_feat.shape) - 1, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_bias', - initializer=fluid.initializer.Constant(1.))) - # transform: layer norm - #mask_trans_feat = pre_process_layer( - # mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._emb_dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - return mean_mask_lm_loss - - def get_task_output(self, task, task_labels): - task_fc_out = fluid.layers.fc( - input=self.next_sent_feat, - size=task["num_labels"], - param_attr=fluid.ParamAttr( - name=task["task_name"] + "_fc.w_0", - initializer=self._param_initializer), - bias_attr=task["task_name"] + "_fc.b_0") - task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( - logits=task_fc_out, label=task_labels, return_softmax=True) - task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) - mean_task_loss = fluid.layers.mean(task_loss) - return mean_task_loss, task_acc diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/module.py b/hub_module/modules/text/semantic_model/ernie_v2_eng_large/module.py deleted file mode 100644 index 9c63c5d92a4f148508915d97b5d1f950b1990f47..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/module.py +++ /dev/null @@ -1,77 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from ernie_v2_eng_large.model.ernie import ErnieModel, ErnieConfig - - -@moduleinfo( - name="ernie_v2_eng_large", - version="1.1.0", - summary= - "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, A Continual Pre-training Framework for Language Understanding. 12-layer, 768-hidden, 12-heads, 110M parameters.", - author="baidu-nlp", - author_email="", - type="nlp/semantic_model", -) -class ErnieV2EngLarge(TransformerModule): - def _initialize(self): - ernie_config_path = os.path.join(self.directory, "assets", - "ernie_config.json") - self.ernie_config = ErnieConfig(ernie_config_path) - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt")\ - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - self.ernie_config._config_dict['use_task_id'] = False - ernie = ErnieModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - task_ids=None, - input_mask=input_mask, - config=self.ernie_config, - use_fp16=False) - pooled_output = ernie.get_pooled_output() - sequence_output = ernie.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = ErnieV2EngLarge() diff --git a/hub_module/modules/text/semantic_model/lda_news/document.py b/hub_module/modules/text/semantic_model/lda_news/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/lda_news/inference_engine.py b/hub_module/modules/text/semantic_model/lda_news/inference_engine.py deleted file mode 100644 index c729d7da89186f5b7969f764cae8b788befd40f6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from lda_news.config import ModelConfig -from lda_news.util import load_prototxt, fix_random_seed, rand_k -from lda_news.model import TopicModel -from lda_news.sampler import GibbsSampler, MHSampler -from lda_news.document import LDADoc, SLDADoc, Token, Sentence -from lda_news.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/lda_news/model.py b/hub_module/modules/text/semantic_model/lda_news/model.py deleted file mode 100644 index a473015b2934f28a0f3004abff2420e4ad6c2cb6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from lda_news.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/lda_news/module.py b/hub_module/modules/text/semantic_model/lda_news/module.py deleted file mode 100644 index e8e7fe40675784ab6947333846230537b5cc0085..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/module.py +++ /dev/null @@ -1,200 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from lda_news.inference_engine import InferenceEngine -from lda_news.document import LDADoc, SLDADoc -from lda_news.semantic_matching import SemanticMatching, WordAndDis -from lda_news.tokenizer import LACTokenizer, SimpleTokenizer -from lda_news.config import ModelType -from lda_news.vocab import Vocab, WordCount - - -@moduleinfo( - name="lda_news", - version="1.0.2", - summary= - "This is a PaddleHub Module for LDA topic model in news dataset, where we can calculate doc distance, calculate the similarity between query and document, etc", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """ - Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'news') - self.conf_file = 'lda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish initialization.") - - def cal_doc_distance(self, doc_text1, doc_text2): - """ - This interface calculates the distance between documents. - - Args: - doc_text1(str): the input document text 1. - doc_text2(str): the input document text 2. - - Returns: - jsd(float): Jensen-Shannon Divergence distance of two documents. - hd(float): Hellinger Distance of two documents. - """ - doc1_tokens = self.__tokenizer.tokenize(doc_text1) - doc2_tokens = self.__tokenizer.tokenize(doc_text2) - - # Document topic inference. - doc1, doc2 = LDADoc(), LDADoc() - self.__engine.infer(doc1_tokens, doc1) - self.__engine.infer(doc2_tokens, doc2) - - # To calculate jsd, we need dense document topic distribution. - dense_dict1 = doc1.dense_topic_dist() - dense_dict2 = doc2.dense_topic_dist() - # Calculate the distance between distributions. - # The smaller the distance, the higher the document semantic similarity. - sm = SemanticMatching() - jsd = sm.jensen_shannon_divergence(dense_dict1, dense_dict2) - hd = sm.hellinger_distance(dense_dict1, dense_dict2) - - return jsd, hd - - def cal_doc_keywords_similarity(self, document, top_k=10): - """ - This interface can be used to find top k keywords of document. - - Args: - document(str): the input document text. - top_k(int): top k keywords of this document. - - Returns: - results(list): contains top_k keywords and their corresponding - similarity compared to document. - """ - d_tokens = self.__tokenizer.tokenize(document) - - # Do topic inference on documents to obtain topic distribution. - doc = LDADoc() - self.__engine.infer(d_tokens, doc) - doc_topic_dist = doc.sparse_topic_dist() - - items = [] - words = set() - for word in d_tokens: - if word in words: - continue - words.add(word) - wd = WordAndDis() - wd.word = word - sm = SemanticMatching() - wd.distance = sm.likelihood_based_similarity( - terms=[word], - doc_topic_dist=doc_topic_dist, - model=self.__engine.get_model()) - items.append(wd) - - def take_elem(word_dis): - return word_dis.distance - - items.sort(key=take_elem, reverse=True) - - results = [] - size = len(items) - for i in range(top_k): - if i >= size: - break - results.append({ - "word": items[i].word, - "similarity": items[i].distance - }) - - return results - - def cal_query_doc_similarity(self, query, document): - """ - This interface calculates the similarity between query and document. - - Args: - query(str): the input query text. - document(str): the input document text. - - Returns: - lda_sim(float): likelihood based similarity between query and document - based on LDA. - """ - q_tokens = self.__tokenizer.tokenize(query) - d_tokens = self.__tokenizer.tokenize(document) - - doc = LDADoc() - self.__engine.infer(d_tokens, doc) - doc_topic_dist = doc.sparse_topic_dist() - - sm = SemanticMatching() - lda_sim = sm.likelihood_based_similarity(q_tokens, doc_topic_dist, - self.__engine.get_model()) - - return lda_sim - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - doc = LDADoc() - self.__engine.infer(tokens, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns first k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and corresponding - probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/lda_news/sampler.py b/hub_module/modules/text/semantic_model/lda_news/sampler.py deleted file mode 100644 index fa79933ffbf88ceb7dd069d39c88d64a5d302d91..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/sampler.py +++ /dev/null @@ -1,309 +0,0 @@ -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from lda_news.document import LDADoc, SLDADoc, Token, Sentence -from lda_news.vose_alias import VoseAlias -from lda_news.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/lda_news/tokenizer.py b/hub_module/modules/text/semantic_model/lda_news/tokenizer.py deleted file mode 100644 index b417eaa9a20738d481f4bc9655a5c859e7ff71d3..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/tokenizer.py +++ /dev/null @@ -1,126 +0,0 @@ -"""This file defines tokenizer class object. -""" - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/lda_news/vocab.py b/hub_module/modules/text/semantic_model/lda_news/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_news/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/semantic_model/lda_novel/document.py b/hub_module/modules/text/semantic_model/lda_novel/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/lda_novel/inference_engine.py b/hub_module/modules/text/semantic_model/lda_novel/inference_engine.py deleted file mode 100644 index d844ad8cb28b2c891d93d9682f0be78788159c80..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from lda_novel.config import ModelConfig -from lda_novel.util import load_prototxt, fix_random_seed, rand_k -from lda_novel.model import TopicModel -from lda_novel.sampler import GibbsSampler, MHSampler -from lda_novel.document import LDADoc, SLDADoc, Token, Sentence -from lda_novel.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/lda_novel/model.py b/hub_module/modules/text/semantic_model/lda_novel/model.py deleted file mode 100644 index 05a3d84c092dce733cab1f8dabb2a2c02e2127bb..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from lda_novel.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/lda_novel/module.py b/hub_module/modules/text/semantic_model/lda_novel/module.py deleted file mode 100644 index 8bfdb71380ac20aa87ab8f27996b6502145781e0..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/module.py +++ /dev/null @@ -1,200 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from lda_novel.inference_engine import InferenceEngine -from lda_novel.document import LDADoc, SLDADoc -from lda_novel.semantic_matching import SemanticMatching, WordAndDis -from lda_novel.tokenizer import LACTokenizer, SimpleTokenizer -from lda_novel.config import ModelType -from lda_novel.vocab import Vocab, WordCount - - -@moduleinfo( - name="lda_novel", - version="1.0.2", - summary= - "This is a PaddleHub Module for LDA topic model in novel dataset, where we can calculate doc distance, calculate the similarity between query and document, etc.", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """ - Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'novel') - self.conf_file = 'lda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish initialization.") - - def cal_doc_distance(self, doc_text1, doc_text2): - """ - This interface calculates the distance between documents. - - Args: - doc_text1(str): the input document text 1. - doc_text2(str): the input document text 2. - - Returns: - jsd(float): Jensen-Shannon Divergence distance of two documents. - hd(float): Hellinger Distance of two documents. - """ - doc1_tokens = self.__tokenizer.tokenize(doc_text1) - doc2_tokens = self.__tokenizer.tokenize(doc_text2) - - # Document topic inference. - doc1, doc2 = LDADoc(), LDADoc() - self.__engine.infer(doc1_tokens, doc1) - self.__engine.infer(doc2_tokens, doc2) - - # To calculate jsd, we need dense document topic distribution. - dense_dict1 = doc1.dense_topic_dist() - dense_dict2 = doc2.dense_topic_dist() - # Calculate the distance between distributions. - # The smaller the distance, the higher the document semantic similarity. - sm = SemanticMatching() - jsd = sm.jensen_shannon_divergence(dense_dict1, dense_dict2) - hd = sm.hellinger_distance(dense_dict1, dense_dict2) - - return jsd, hd - - def cal_doc_keywords_similarity(self, document, top_k=10): - """ - This interface can be used to find topk keywords of document. - - Args: - document(str): the input document text. - top_k(int): top k keywords of this document. - - Returns: - results(list): contains top_k keywords and their corresponding - similarity compared to document. - """ - d_tokens = self.__tokenizer.tokenize(document) - - # Do topic inference on documents to obtain topic distribution. - doc = LDADoc() - self.__engine.infer(d_tokens, doc) - doc_topic_dist = doc.sparse_topic_dist() - - items = [] - words = set() - for word in d_tokens: - if word in words: - continue - words.add(word) - wd = WordAndDis() - wd.word = word - sm = SemanticMatching() - wd.distance = sm.likelihood_based_similarity( - terms=[word], - doc_topic_dist=doc_topic_dist, - model=self.__engine.get_model()) - items.append(wd) - - def take_elem(word_dis): - return word_dis.distance - - items.sort(key=take_elem, reverse=True) - - results = [] - size = len(items) - for i in range(top_k): - if i >= size: - break - results.append({ - "word": items[i].word, - "similarity": items[i].distance - }) - - return results - - def cal_query_doc_similarity(self, query, document): - """ - This interface calculates the similarity between query and document. - - Args: - query(str): the input query text. - document(str): the input document text. - - Returns: - lda_sim(float): likelihood based similarity between query and document - based on LDA. - """ - q_tokens = self.__tokenizer.tokenize(query) - d_tokens = self.__tokenizer.tokenize(document) - - doc = LDADoc() - self.__engine.infer(d_tokens, doc) - doc_topic_dist = doc.sparse_topic_dist() - - sm = SemanticMatching() - lda_sim = sm.likelihood_based_similarity(q_tokens, doc_topic_dist, - self.__engine.get_model()) - - return lda_sim - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - doc = LDADoc() - self.__engine.infer(tokens, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns the k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and corresponding - probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/lda_novel/sampler.py b/hub_module/modules/text/semantic_model/lda_novel/sampler.py deleted file mode 100644 index 533c2ac15814b37a7f90a190b47439d86fa67ee1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/sampler.py +++ /dev/null @@ -1,311 +0,0 @@ -import os - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from lda_novel.document import LDADoc, SLDADoc, Token, Sentence -from lda_novel.vose_alias import VoseAlias -from lda_novel.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/lda_novel/tokenizer.py b/hub_module/modules/text/semantic_model/lda_novel/tokenizer.py deleted file mode 100644 index 10167882dd791f2f658853352e6b3773677f5b59..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import os - -import numpy as np -from paddlehub.common.logger import logger - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/lda_novel/vocab.py b/hub_module/modules/text/semantic_model/lda_novel/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_novel/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/semantic_model/lda_webpage/document.py b/hub_module/modules/text/semantic_model/lda_webpage/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/lda_webpage/inference_engine.py b/hub_module/modules/text/semantic_model/lda_webpage/inference_engine.py deleted file mode 100644 index 99834d85bf0ec98318e31fe69db75f7a3ace4ca6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from lda_webpage.config import ModelConfig -from lda_webpage.util import load_prototxt, fix_random_seed, rand_k -from lda_webpage.model import TopicModel -from lda_webpage.sampler import GibbsSampler, MHSampler -from lda_webpage.document import LDADoc, SLDADoc, Token, Sentence -from lda_webpage.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/lda_webpage/model.py b/hub_module/modules/text/semantic_model/lda_webpage/model.py deleted file mode 100644 index c0d464fa3a15e2e99f538f2910f63c6126c62605..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from lda_webpage.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/lda_webpage/module.py b/hub_module/modules/text/semantic_model/lda_webpage/module.py deleted file mode 100644 index 34eeea1c352b8da69272cad84be989973ea988f8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/module.py +++ /dev/null @@ -1,199 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from lda_webpage.inference_engine import InferenceEngine -from lda_webpage.document import LDADoc -from lda_webpage.semantic_matching import SemanticMatching, WordAndDis -from lda_webpage.tokenizer import LACTokenizer, SimpleTokenizer -from lda_webpage.config import ModelType -from lda_webpage.vocab import Vocab, WordCount - - -@moduleinfo( - name="lda_webpage", - version="1.0.2", - summary= - "This is a PaddleHub Module for LDA topic model in webpage dataset, where we can calculate doc distance, calculate the similarity between query and document, etc.", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """ - Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'webpage') - self.conf_file = 'lda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish initialization.") - - def cal_doc_distance(self, doc_text1, doc_text2): - """ - This interface calculates the distance between documents. - - Args: - doc_text1(str): the input document text 1. - doc_text2(str): the input document text 2. - - Returns: - jsd(float): Jensen-Shannon Divergence distance of two documents. - hd(float): Hellinger Distance of two documents. - """ - doc1_tokens = self.__tokenizer.tokenize(doc_text1) - doc2_tokens = self.__tokenizer.tokenize(doc_text2) - - # Document topic inference. - doc1, doc2 = LDADoc(), LDADoc() - self.__engine.infer(doc1_tokens, doc1) - self.__engine.infer(doc2_tokens, doc2) - - # To calculate jsd, we need dense document topic distribution. - dense_dict1 = doc1.dense_topic_dist() - dense_dict2 = doc2.dense_topic_dist() - # Calculate the distance between distributions. - # The smaller the distance, the higher the document semantic similarity. - sm = SemanticMatching() - jsd = sm.jensen_shannon_divergence(dense_dict1, dense_dict2) - hd = sm.hellinger_distance(dense_dict1, dense_dict2) - - return jsd, hd - - def cal_doc_keywords_similarity(self, document, top_k=10): - """ - This interface can be used to find topk keywords of document. - - Args: - document(str): the input document text. - top_k(int): top k keywords of this document. - - Returns: - results(list): contains top_k keywords and their - corresponding similarity compared to document. - """ - d_tokens = self.__tokenizer.tokenize(document) - - # Do topic inference on documents to obtain topic distribution. - doc = LDADoc() - self.__engine.infer(d_tokens, doc) - doc_topic_dist = doc.sparse_topic_dist() - - items = [] - words = set() - for word in d_tokens: - if word in words: - continue - words.add(word) - wd = WordAndDis() - wd.word = word - sm = SemanticMatching() - wd.distance = sm.likelihood_based_similarity( - terms=[word], - doc_topic_dist=doc_topic_dist, - model=self.__engine.get_model()) - items.append(wd) - - def take_elem(word_dis): - return word_dis.distance - - items.sort(key=take_elem, reverse=True) - - results = [] - size = len(items) - for i in range(top_k): - if i >= size: - break - results.append({ - "word": items[i].word, - "similarity": items[i].distance - }) - - return results - - def cal_query_doc_similarity(self, query, document): - """ - This interface calculates the similarity between query and document. - - Args: - query(str): the input query text. - document(str): the input document text. - - Returns: - lda_sim(float): likelihood based similarity between query and document based on LDA. - """ - q_tokens = self.__tokenizer.tokenize(query) - d_tokens = self.__tokenizer.tokenize(document) - - doc = LDADoc() - self.__engine.infer(d_tokens, doc) - doc_topic_dist = doc.sparse_topic_dist() - - sm = SemanticMatching() - lda_sim = sm.likelihood_based_similarity(q_tokens, doc_topic_dist, - self.__engine.get_model()) - - return lda_sim - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - doc = LDADoc() - self.__engine.infer(tokens, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns the first k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and - corresponding probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/lda_webpage/sampler.py b/hub_module/modules/text/semantic_model/lda_webpage/sampler.py deleted file mode 100644 index 337c17be20baa8bd3f42b50dff5053d699ae2618..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/sampler.py +++ /dev/null @@ -1,311 +0,0 @@ -import os - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from lda_webpage.document import LDADoc, SLDADoc, Token, Sentence -from lda_webpage.vose_alias import VoseAlias -from lda_webpage.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/lda_webpage/tokenizer.py b/hub_module/modules/text/semantic_model/lda_webpage/tokenizer.py deleted file mode 100644 index 10167882dd791f2f658853352e6b3773677f5b59..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import os - -import numpy as np -from paddlehub.common.logger import logger - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/lda_webpage/vocab.py b/hub_module/modules/text/semantic_model/lda_webpage/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/lda_webpage/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/semantic_model/rbt3/model/bert.py b/hub_module/modules/text/semantic_model/rbt3/model/bert.py deleted file mode 100644 index 250bf79c8f3e0ae3e1919c253c8bc8a677e2a432..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/rbt3/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from rbt3.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/rbt3/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/rbt3/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/rbt3/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/rbt3/module.py b/hub_module/modules/text/semantic_model/rbt3/module.py deleted file mode 100644 index 03f0c2276a384d970acbf11059efadc1e3a03d5d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/rbt3/module.py +++ /dev/null @@ -1,75 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from rbt3.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="rbt3", - version="1.0.0", - summary="rbt3, 3-layer, 768-hidden, 12-heads, 38M parameters ", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class BertWwm(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config_rbt3.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/rbtl3/model/bert.py b/hub_module/modules/text/semantic_model/rbtl3/model/bert.py deleted file mode 100644 index e2925a95861db6d231a79577910e41c20e48f3fe..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/rbtl3/model/bert.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""BERT model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import json - -import paddle.fluid as fluid - -from rbtl3.model.transformer_encoder import encoder, pre_process_layer - - -class BertConfig(object): - def __init__(self, config_path): - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - try: - with open(config_path) as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing bert model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - return self._config_dict[key] - - def print_config(self): - for arg, value in sorted(six.iteritems(self._config_dict)): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -class BertModel(object): - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - - self._emb_size = config['hidden_size'] - self._n_layer = config['num_hidden_layers'] - self._n_head = config['num_attention_heads'] - self._voc_size = config['vocab_size'] - self._max_position_seq_len = config['max_position_embeddings'] - self._sent_types = config['type_vocab_size'] - self._hidden_act = config['hidden_act'] - self._prepostprocess_dropout = config['hidden_dropout_prob'] - self._attention_dropout = config['attention_probs_dropout_prob'] - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._dtype = "float16" if use_fp16 else "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range']) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - size=[self._voc_size, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - position_emb_out = fluid.layers.embedding( - input=position_ids, - size=[self._max_position_seq_len, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - sent_emb_out = fluid.layers.embedding( - sentence_ids, - size=[self._sent_types, self._emb_size], - dtype=self._dtype, - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') - - if self._dtype == "float16": - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._emb_size // self._n_head, - d_value=self._emb_size // self._n_head, - d_model=self._emb_size, - d_inner_hid=self._emb_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd="", - postprocess_cmd="dan", - param_initializer=self._param_initializer, - name='encoder') - - def get_sequence_output(self): - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_pretraining_output(self, mask_label, mask_pos, labels): - """Get the loss & accuracy for pretraining""" - - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - - # extract the first token feature in each sentence - next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._emb_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - # transform: fc - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=self._hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - # transform: layer norm - mask_trans_feat = pre_process_layer( - mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - next_sent_fc_out = fluid.layers.fc( - input=next_sent_feat, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self._param_initializer), - bias_attr="next_sent_fc.b_0") - - next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( - logits=next_sent_fc_out, label=labels, return_softmax=True) - - next_sent_acc = fluid.layers.accuracy( - input=next_sent_softmax, label=labels) - - mean_next_sent_loss = fluid.layers.mean(next_sent_loss) - - loss = mean_next_sent_loss + mean_mask_lm_loss - return next_sent_acc, mean_mask_lm_loss, loss diff --git a/hub_module/modules/text/semantic_model/rbtl3/model/transformer_encoder.py b/hub_module/modules/text/semantic_model/rbtl3/model/transformer_encoder.py deleted file mode 100644 index 9a5913efc1fb6d7fc0ac5f6df37fb70424d259d9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/rbtl3/model/transformer_encoder.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.))) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - enc_input, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_att'), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att') - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - name=name + '_pre_ffn'), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn') - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name=''): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - for i in range(n_layer): - enc_output = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i)) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") - - return enc_output diff --git a/hub_module/modules/text/semantic_model/rbtl3/module.py b/hub_module/modules/text/semantic_model/rbtl3/module.py deleted file mode 100644 index d21cec527fa95cf91cc82bfc9ff074da8aecb80d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/rbtl3/module.py +++ /dev/null @@ -1,75 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo - -from rbtl3.model.bert import BertConfig, BertModel - - -@moduleinfo( - name="rbtl3", - version="1.0.0", - summary="rbtl3, 3-layer, 1024-hidden, 16-heads, 61M parameters ", - author="ymcui", - author_email="ymcui@ir.hit.edu.cn", - type="nlp/semantic_model", -) -class BertWwm(TransformerModule): - def _initialize(self): - self.MAX_SEQ_LEN = 512 - self.params_path = os.path.join(self.directory, "assets", "params") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - - bert_config_path = os.path.join(self.directory, "assets", - "bert_config_rbtl3.json") - self.bert_config = BertConfig(bert_config_path) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - bert = BertModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.bert_config, - use_fp16=False) - pooled_output = bert.get_pooled_output() - sequence_output = bert.get_sequence_output() - return pooled_output, sequence_output - - -if __name__ == '__main__': - test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/simnet_bow/module.py b/hub_module/modules/text/semantic_model/simnet_bow/module.py deleted file mode 100644 index 3bb16adfe1ad2f1d0092f410b2163afd705486a2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/simnet_bow/module.py +++ /dev/null @@ -1,460 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import json -import math -import os -import six - -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix, get_variable_info -from paddlehub.common.utils import sys_stdin_encoding -from paddlehub.io.parser import txt_parser -from paddlehub.module.module import serving -from paddlehub.module.module import moduleinfo -from paddlehub.module.module import runnable - -from simnet_bow.processor import load_vocab, preprocess, postprocess - - -class DataFormatError(Exception): - def __init__(self, *args): - self.args = args - - -@moduleinfo( - name="simnet_bow", - version="1.2.0", - summary= - "Baidu's open-source similarity network model based on bow_pairwise.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class SimnetBow(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - self.param_file = os.path.join(self.directory, "assets", "params.txt") - self._word_seg_module = None - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained simnet_bow - - Args: - trainable(bool): whether fine-tune the pretrained parameters of simnet_bow or not。 - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of simnet_bow (words) - outputs(dict): the output variables of input words (word embeddings) and sequence lenght of the first input_text - main_program(Program): the main_program of simnet_bow with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="emb", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 500002 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - def texts2tensor(self, texts): - """ - Tranform the texts(dict) to PaddleTensor - Args: - texts(list): texts - Returns: - tensor(PaddleTensor): tensor with texts data - """ - lod = [0] - data = [] - for i, text in enumerate(texts): - data += text['processed'] - lod.append(len(text['processed']) + lod[i]) - tensor = PaddleTensor(np.array(data).astype('int64')) - tensor.name = "words" - tensor.lod = [lod] - tensor.shape = [lod[-1], 1] - return tensor - - def to_unicode(self, texts): - """ - Convert each element's type(str) of texts(list) to unicode in python2.7 - Args: - texts(list): each element's type is str in python2.7 - Returns: - texts(list): each element's type is unicode in python2.7 - """ - - if six.PY2: - unicode_texts = [] - for text in texts: - if isinstance(text, six.string_types): - unicode_texts.append( - text.decode(sys_stdin_encoding()).decode("utf8")) - else: - unicode_texts.append(text) - texts = unicode_texts - return texts - - def check_data(self, texts=[], data={}): - """ - check input data - Args: - texts(list): the input texts to be predicted which the first element is text_1(list) - and the second element is text_2(list), such as [['这道题很难'], ['这道题不简单']] - if texts not data. - data(dict): key must be 'text_1' and 'text_2', value is the texts(list) to be predicted - Returns: - results(dict): predicted data - """ - predicted_data = {'text_1': [], 'text_2': []} - if texts != [] and isinstance(texts, list) and len(texts) == 2 and (len( - texts[0]) == len( - texts[1])) and texts[0] and texts[1] and data == {}: - - predicted_data['text_1'] = texts[0] - predicted_data['text_2'] = texts[1] - - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text_1', None), list) and isinstance( - data.get('text_2', None), - list) and (len(data['text_1']) == len( - data['text_2'])) and data['text_1'] and data['text_2']: - - predicted_data = data - - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - return predicted_data - - @serving - def similarity(self, texts=[], data={}, use_gpu=False, batch_size=1): - """ - Get the sentiment prediction results results with the texts as input - Args: - texts(list): the input texts to be predicted which the first element is text_1(list) - and the second element is text_2(list), such as [['这道题很难'], ['这道题不简单']] - if texts not data. - data(dict): key must be 'text_1' and 'text_2', value is the texts(list) to be predicted - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - Returns: - results(list): the word segmentation results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - data = self.check_data(texts, data) - - start_idx = 0 - iteration = int(math.ceil(len(data['text_1']) / batch_size)) - results = [] - for i in range(iteration): - batch_data = {'text_1': [], 'text_2': []} - if i < (iteration - 1): - batch_data['text_1'] = data['text_1'][start_idx:( - start_idx + batch_size)] - batch_data['text_2'] = data['text_2'][start_idx:( - start_idx + batch_size)] - else: - batch_data['text_1'] = data['text_1'][start_idx:( - start_idx + batch_size)] - batch_data['text_2'] = data['text_2'][start_idx:( - start_idx + batch_size)] - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, self.vocab, - batch_data, use_gpu, batch_size) - - tensor_words_1 = self.texts2tensor(processed_results["text_1"]) - tensor_words_2 = self.texts2tensor(processed_results["text_2"]) - - if use_gpu: - batch_out = self.gpu_predictor.run( - [tensor_words_1, tensor_words_2]) - else: - batch_out = self.cpu_predictor.run( - [tensor_words_1, tensor_words_2]) - batch_result = postprocess(batch_out[1], processed_results) - results += batch_result - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the simnet_bow module.", - prog='hub run simnet_bow', - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.similarity( - data=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) - - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--batch_size', - type=int, - default=1, - help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_file', - type=str, - default=None, - help="file contain input data") - self.arg_input_group.add_argument( - '--text_1', type=str, default=None, help="text to predict") - self.arg_input_group.add_argument( - '--text_2', type=str, default=None, help="text to predict") - - def check_input_data(self, args): - input_data = {} - if args.input_file: - if not os.path.exists(args.input_file): - print("File %s is not exist." % args.input_file) - raise RuntimeError - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - elif args.text_1 and args.text_2: - if args.text_1.strip() != '' and args.text_2.strip() != '': - if six.PY2: - input_data = { - "text_1": [ - args.text_1.strip().decode( - sys_stdin_encoding()).decode("utf8") - ], - "text_2": [ - args.text_2.strip().decode( - sys_stdin_encoding()).decode("utf8") - ] - } - else: - input_data = { - "text_1": [args.text_1], - "text_2": [args.text_2] - } - else: - print( - "ERROR: The input data is inconsistent with expectations.") - - if input_data == {}: - print("ERROR: The input data is inconsistent with expectations.") - raise DataFormatError - - return input_data - - def get_vocab_path(self): - """ - Get the path to the vocabulary whih was used to pretrain - Returns: - self.vocab_path(str): the path to vocabulary - """ - return self.vocab_path - - -if __name__ == "__main__": - - simnet_bow = SimnetBow() - inputs, outputs, program = simnet_bow.context(num_slots=3) - print(inputs) - print(outputs) - - # Data to be predicted - test_text_1 = ["这道题太难了", "这道题太难了", "这道题太难了"] - test_text_2 = ["这道题是上一年的考题", "这道题不简单", "这道题很有意思"] - - inputs = {"text_1": test_text_1, "text_2": test_text_2} - results = simnet_bow.similarity(data=inputs, batch_size=2) - print(results) - max_score = -1 - result_text = "" - for result in results: - if result['similarity'] > max_score: - max_score = result['similarity'] - result_text = result['text_2'] - - print("The most matching with the %s is %s" % (test_text_1[0], result_text)) diff --git a/hub_module/modules/text/semantic_model/simnet_bow/processor.py b/hub_module/modules/text/semantic_model/simnet_bow/processor.py deleted file mode 100644 index 54b0e11f3fcdcf5881fd6697d82d6d091d8ebe54..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/simnet_bow/processor.py +++ /dev/null @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -import io - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - wid = 0 - for line in f: - line = line.rstrip() - parts = line.split('\t') - vocab[parts[0]] = int(parts[1]) - vocab[""] = len(vocab) - return vocab - - -text_a_key = "text_1" -text_b_key = "text_2" - - -def preprocess(lac, word_dict, data_dict, use_gpu=False, batch_size=1): - """ - Convert the word str to word id and pad the text - """ - result = {text_a_key: [], text_b_key: []} - processed_a = lac.lexical_analysis( - data={'text': data_dict[text_a_key]}, - use_gpu=use_gpu, - batch_size=batch_size) - processed_b = lac.lexical_analysis( - data={'text': data_dict[text_b_key]}, use_gpu=use_gpu) - unk_id = word_dict[''] - for index, (text_a, text_b) in enumerate(zip(processed_a, processed_b)): - result_i = {'processed': []} - result_i['origin'] = data_dict[text_a_key][index] - for word in text_a['word']: - _index = word_dict.get(word, unk_id) - result_i['processed'].append(_index) - result[text_a_key].append(result_i) - - result_i = {'processed': []} - result_i['origin'] = data_dict[text_b_key][index] - for word in text_b['word']: - _index = word_dict.get(word, unk_id) - result_i['processed'].append(_index) - result[text_b_key].append(result_i) - return result - - -def postprocess(predict_out, data_info): - """ - Convert model's output tensor to pornography label - """ - result = [] - pred = predict_out.as_ndarray() - for index in range(len(data_info[text_a_key])): - result_i = {} - result_i[text_a_key] = data_info[text_a_key][index]['origin'] - result_i[text_b_key] = data_info[text_b_key][index]['origin'] - result_i['similarity'] = float('%.4f' % pred[index][0]) - result.append(result_i) - return result diff --git a/hub_module/modules/text/semantic_model/slda_news/document.py b/hub_module/modules/text/semantic_model/slda_news/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/slda_news/inference_engine.py b/hub_module/modules/text/semantic_model/slda_news/inference_engine.py deleted file mode 100644 index ad12a12507a41582e9546c9ddabedec33496af67..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from slda_news.config import ModelConfig -from slda_news.util import load_prototxt, fix_random_seed, rand_k -from slda_news.model import TopicModel -from slda_news.sampler import GibbsSampler, MHSampler -from slda_news.document import LDADoc, SLDADoc, Token, Sentence -from slda_news.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/slda_news/model.py b/hub_module/modules/text/semantic_model/slda_news/model.py deleted file mode 100644 index c8f4647d084d85be9f94330e319112693e9e593e..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_news.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/slda_news/module.py b/hub_module/modules/text/semantic_model/slda_news/module.py deleted file mode 100644 index 60d14ad271862139fd20a32736cc6b9d7222b3cf..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/module.py +++ /dev/null @@ -1,101 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from slda_news.inference_engine import InferenceEngine -from slda_news.document import SLDADoc -from slda_news.semantic_matching import SemanticMatching, WordAndDis -from slda_news.tokenizer import LACTokenizer, SimpleTokenizer -from slda_news.config import ModelType -from slda_news.vocab import Vocab, WordCount - - -@moduleinfo( - name="slda_news", - version="1.0.0", - summary= - "This is a PaddleHub Module for SLDA topic model in news dataset, where we can infer the topic distribution of document.", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'news') - self.conf_file = 'slda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish Initialization.") - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - sentences = [] - sent = [] - for i in range(len(tokens)): - sent.append(tokens[i]) - if len(sent) % 5 == 0: - sentences.append(sent) - sent = [] - if len(sent) > 0: - sentences.append(sent) - doc = SLDADoc() - self.__engine.infer(sentences, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns the k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and corresponding - probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/slda_news/sampler.py b/hub_module/modules/text/semantic_model/slda_news/sampler.py deleted file mode 100644 index ba23fa2f1eb5be240d7a6937f583d4e0d8e5874f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/sampler.py +++ /dev/null @@ -1,311 +0,0 @@ -import os - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_news.document import LDADoc, SLDADoc, Token, Sentence -from slda_news.vose_alias import VoseAlias -from slda_news.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_news/tokenizer.py b/hub_module/modules/text/semantic_model/slda_news/tokenizer.py deleted file mode 100644 index 10167882dd791f2f658853352e6b3773677f5b59..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import os - -import numpy as np -from paddlehub.common.logger import logger - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_news/vocab.py b/hub_module/modules/text/semantic_model/slda_news/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_news/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_novel/document.py b/hub_module/modules/text/semantic_model/slda_novel/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/slda_novel/inference_engine.py b/hub_module/modules/text/semantic_model/slda_novel/inference_engine.py deleted file mode 100644 index 2faddfebdcc10ed86511fc2bdb1e2d0f9c48572e..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from slda_novel.config import ModelConfig -from slda_novel.util import load_prototxt, fix_random_seed, rand_k -from slda_novel.model import TopicModel -from slda_novel.sampler import GibbsSampler, MHSampler -from slda_novel.document import LDADoc, SLDADoc, Token, Sentence -from slda_novel.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/slda_novel/model.py b/hub_module/modules/text/semantic_model/slda_novel/model.py deleted file mode 100644 index 9f50ded6d6b3d2fe9d794b7826c412e5e97aa1a6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_novel.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/slda_novel/module.py b/hub_module/modules/text/semantic_model/slda_novel/module.py deleted file mode 100644 index ff49852806d37359e1c83cf03992258f1b344ffb..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/module.py +++ /dev/null @@ -1,103 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from slda_novel.inference_engine import InferenceEngine -from slda_novel.document import SLDADoc -from slda_novel.semantic_matching import SemanticMatching, WordAndDis -from slda_novel.tokenizer import LACTokenizer, SimpleTokenizer -from slda_novel.config import ModelType -from slda_novel.vocab import Vocab, WordCount - - -@moduleinfo( - name="slda_novel", - version="1.0.0", - summary= - "This is a PaddleHub Module for SLDA topic model in novel dataset, where we can infer the topic distribution of document.", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """ - Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'novel') - self.conf_file = 'slda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish Initialization.") - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - sentences = [] - sent = [] - for i in range(len(tokens)): - sent.append(tokens[i]) - if len(sent) % 5 == 0: - sentences.append(sent) - sent = [] - if len(sent) > 0: - sentences.append(sent) - - doc = SLDADoc() - self.__engine.infer(sentences, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns the k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and corresponding - probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/slda_novel/sampler.py b/hub_module/modules/text/semantic_model/slda_novel/sampler.py deleted file mode 100644 index fe7dc6bfa4bd8ed8ae128362f931efde88ceefbc..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/sampler.py +++ /dev/null @@ -1,311 +0,0 @@ -import os - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_novel.document import LDADoc, SLDADoc, Token, Sentence -from slda_novel.vose_alias import VoseAlias -from slda_novel.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_novel/tokenizer.py b/hub_module/modules/text/semantic_model/slda_novel/tokenizer.py deleted file mode 100644 index 10167882dd791f2f658853352e6b3773677f5b59..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import os - -import numpy as np -from paddlehub.common.logger import logger - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_novel/vocab.py b/hub_module/modules/text/semantic_model/slda_novel/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_novel/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_webpage/document.py b/hub_module/modules/text/semantic_model/slda_webpage/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/slda_webpage/inference_engine.py b/hub_module/modules/text/semantic_model/slda_webpage/inference_engine.py deleted file mode 100644 index 5812d02942fbe79bc632532153602dbac768ba73..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from slda_webpage.config import ModelConfig -from slda_webpage.util import load_prototxt, fix_random_seed, rand_k -from slda_webpage.model import TopicModel -from slda_webpage.sampler import GibbsSampler, MHSampler -from slda_webpage.document import LDADoc, SLDADoc, Token, Sentence -from slda_webpage.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/slda_webpage/model.py b/hub_module/modules/text/semantic_model/slda_webpage/model.py deleted file mode 100644 index 6170f0b527ab2d8a80aa5ab5750feae0e44dc14b..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_webpage.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/slda_webpage/module.py b/hub_module/modules/text/semantic_model/slda_webpage/module.py deleted file mode 100644 index 5597fd6efd3a0b200a52e9fe1c8f3654ae5a1ac4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/module.py +++ /dev/null @@ -1,103 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from slda_webpage.inference_engine import InferenceEngine -from slda_webpage.document import SLDADoc -from slda_webpage.semantic_matching import SemanticMatching, WordAndDis -from slda_webpage.tokenizer import LACTokenizer, SimpleTokenizer -from slda_webpage.config import ModelType -from slda_webpage.vocab import Vocab, WordCount - - -@moduleinfo( - name="slda_webpage", - version="1.0.0", - summary= - "This is a PaddleHub Module for SLDA topic model in webpage dataset, where we can infer the topic distribution of document.", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """ - Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'webpage') - self.conf_file = 'slda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish Initialization.") - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - sentences = [] - sent = [] - for i in range(len(tokens)): - sent.append(tokens[i]) - if len(sent) % 5 == 0: - sentences.append(sent) - sent = [] - if len(sent) > 0: - sentences.append(sent) - - doc = SLDADoc() - self.__engine.infer(sentences, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns the k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and - corresponding probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/slda_webpage/sampler.py b/hub_module/modules/text/semantic_model/slda_webpage/sampler.py deleted file mode 100644 index 6aed8b51aee0518d7243741e32e9fcab0530ecaa..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/sampler.py +++ /dev/null @@ -1,311 +0,0 @@ -import os - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_webpage.document import LDADoc, SLDADoc, Token, Sentence -from slda_webpage.vose_alias import VoseAlias -from slda_webpage.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_webpage/tokenizer.py b/hub_module/modules/text/semantic_model/slda_webpage/tokenizer.py deleted file mode 100644 index 10167882dd791f2f658853352e6b3773677f5b59..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import os - -import numpy as np -from paddlehub.common.logger import logger - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_webpage/vocab.py b/hub_module/modules/text/semantic_model/slda_webpage/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_webpage/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_weibo/document.py b/hub_module/modules/text/semantic_model/slda_weibo/document.py deleted file mode 100644 index b79acd0794f60fb0a401726aea0b8b51deda90a1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/document.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np - - -class Topic(object): - """Basic data structure of topic, contains topic id and - corresponding probability. - """ - - def __init__(self, tid, prob): - self.tid = tid # topic id - self.prob = prob # topic probability - - -class Token(object): - """Basic storage unit of LDA documents, contains word id - and corresponding topic. - """ - - def __init__(self, topic, id): - self.topic = topic - self.id = id - - -class Sentence(object): - """Basic storage unit of SentenceLDA documents, contains word ids - of the sentence and its corresponding topic id. - """ - - def __init__(self, topic, tokens): - self.topic = topic - self.tokens = tokens - - -class LDADoc(object): - """The storage structure of LDA model's inference result. - """ - - def __init__(self): - self._num_topics = None # Number of topics. - self._num_accum = None # Number of accumulated sample rounds. - self._alpha = None # Document prior parameter. - self._tokens = None # Storage structure of inference results. - self._topic_sum = None # Document's topic sum in one round samples. - self._accum_topic_sum = None # Accumulated results of topic sum. - - def init(self, num_topics): - """Initialize the LDADoc according to num_topics. - """ - self._num_topics = num_topics - self._num_accum = 0 - self._tokens = [] - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_token(self, token): - """Add new word to current LDADoc. - Arg: - token: Token class object. - """ - assert token.topic >= 0, "Topic %d out of range!" % token.topic - assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic - self._tokens.append(token) - self._topic_sum[token.topic] += 1 - - def token(self, index): - return self._tokens[index] - - def set_topic(self, index, new_topic): - """Set the index word's topic to new_topic, and update the corresponding - topic distribution. - """ - assert new_topic >= 0, "Topic %d out of range!" % new_topic - assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic - old_topic = self._tokens[index].topic - if new_topic == old_topic: - return - self._tokens[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def set_alpha(self, alpha): - self._alpha = alpha - - def size(self): - """Return number of words in LDADoc. - """ - return len(self._tokens) - - def topic_sum(self, topic_id): - return self._topic_sum[topic_id] - - def sparse_topic_dist(self, sort=True): - """Return the topic distribution of documents in sparse format. - By default, it is sorted according to the topic probability - under the descending order. - """ - topic_dist = [] - sum_ = np.sum(self._accum_topic_sum) - if sum_ == 0: - return topic_dist - for i in range(0, self._num_topics): - if self._accum_topic_sum[i] == 0: - continue - topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) - if sort: - - def take_elem(topic): - return topic.prob - - topic_dist.sort(key=take_elem, reverse=True) - if topic_dist is None: - topic_dist = [] - - return topic_dist - - def dense_topic_dist(self): - """Return the distribution of document topics in dense format, - taking into account the prior parameter alpha. - """ - dense_dist = np.zeros(self._num_topics) - if self.size() == 0: - return dense_dist - dense_dist = ( - self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( - self.size() + self._alpha * self._num_topics) - return dense_dist - - def accumulate_topic_num(self): - self._accum_topic_sum += self._topic_sum - self._num_accum += 1 - - -class SLDADoc(LDADoc): - """Sentence LDA Document, inherited from LDADoc. - Add add_sentence interface. - """ - - def __init__(self): - super().__init__() - self.__sentences = None - - def init(self, num_topics): - """Initialize the SLDADoc according to num_topics. - """ - self._num_topics = num_topics - self.__sentences = [] - self._num_accum = 0 - self._topic_sum = np.zeros(self._num_topics) - self._accum_topic_sum = np.zeros(self._num_topics) - - def add_sentence(self, sent): - """Add new sentence to current SLDADoc. - Arg: - sent: Sentence class object. - """ - assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) - assert sent.topic < self._num_topics, "Topic %d out of range!" % ( - sent.topic) - self.__sentences.append(sent) - self._topic_sum[sent.topic] += 1 - - def set_topic(self, index, new_topic): - assert new_topic >= 0, "Topic %d out of range!" % (new_topic) - assert new_topic < self._num_topics, "Topic %d out of range!" % ( - new_topic) - old_topic = self.__sentences[index].topic - if new_topic == old_topic: - return - self.__sentences[index].topic = new_topic - self._topic_sum[old_topic] -= 1 - self._topic_sum[new_topic] += 1 - - def size(self): - """Return number of sentences in SLDADoc. - """ - return len(self.__sentences) - - def sent(self, index): - return self.__sentences[index] diff --git a/hub_module/modules/text/semantic_model/slda_weibo/inference_engine.py b/hub_module/modules/text/semantic_model/slda_weibo/inference_engine.py deleted file mode 100644 index eec30b9a7b2db7b0b3c0a51f866c8e32b65c4a9f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/inference_engine.py +++ /dev/null @@ -1,94 +0,0 @@ -import os - -from paddlehub.common.logger import logger - -from slda_weibo.config import ModelConfig -from slda_weibo.util import load_prototxt, fix_random_seed, rand_k -from slda_weibo.model import TopicModel -from slda_weibo.sampler import GibbsSampler, MHSampler -from slda_weibo.document import LDADoc, SLDADoc, Token, Sentence -from slda_weibo.vocab import OOV - - -class SamplerType: - GibbsSampling = 0 - MetropolisHastings = 1 - - -class InferenceEngine(object): - def __init__(self, - model_dir, - conf_file, - type=SamplerType.MetropolisHastings): - # Read model configuration. - config = ModelConfig() - conf_file_path = os.path.join(model_dir, conf_file) - load_prototxt(conf_file_path, config) - self.__model = TopicModel(model_dir, config) - self.__config = config - - # Initialize the sampler according to the configuration. - if type == SamplerType.GibbsSampling: - self.__sampler = GibbsSampler(self.__model) - elif type == SamplerType.MetropolisHastings: - self.__sampler = MHSampler(self.__model) - - def infer(self, input, doc): - """Perform LDA topic inference on input, and store the results in doc. - Args: - input: a list of strings after tokenization. - doc: LDADoc type or SLDADoc type. - """ - fix_random_seed() - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for token in input: - id_ = self.__model.term_id(token) - if id_ != OOV: - init_topic = rand_k(self.__model.num_topics()) - doc.add_token(Token(init_topic, id_)) - self.lda_infer(doc, 20, 50) - elif isinstance(doc, SLDADoc): - doc.init(self.__model.num_topics()) - doc.set_alpha(self.__model.alpha()) - for sent in input: - words = [] - for token in sent: - id_ = self.__model.term_id(token) - if id_ != OOV: - words.append(id_) - init_topic = rand_k(self.__model.num_topics()) - doc.add_sentence(Sentence(init_topic, words)) - self.slda_infer(doc, 20, 50) - else: - logger.error("Wrong Doc Type!") - - def lda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def slda_infer(self, doc, burn_in_iter, total_iter): - assert burn_in_iter >= 0 - assert total_iter > 0 - assert total_iter > burn_in_iter - - for iter_ in range(total_iter): - self.__sampler.sample_doc(doc) - if iter_ >= burn_in_iter: - doc.accumulate_topic_num() - - def model_type(self): - return self.__model.type() - - def get_model(self): - return self.__model - - def get_config(self): - return self.__config diff --git a/hub_module/modules/text/semantic_model/slda_weibo/model.py b/hub_module/modules/text/semantic_model/slda_weibo/model.py deleted file mode 100644 index 4da9e2955cd09e4532e35f9483c7419adeb02355..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/model.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -from collections import OrderedDict - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_weibo.vocab import Vocab, WordCount - - -class TopicModel(object): - """Storage Structure of Topic model, including vocabulary and word topic count. - """ - - def __init__(self, model_dir, config): - """ - Args: - model_dir: the path of model directory - config: ModelConfig class. - """ - self.__word_topic = None # Model parameter of word topic. - self.__vocab = Vocab() # Vocab data structure of model. - self.__num_topics = config.num_topics # Number of topics. - self.__alpha = config.alpha - self.__alpha_sum = self.__alpha * self.__num_topics - self.__beta = config.beta - self.__beta_sum = None - self.__type = config.type # Model type. - self.__topic_sum = np.zeros( - self.__num_topics, - dtype="int64") # Accum sum of each topic in word topic. - self.__topic_words = [[] for _ in range(self.__num_topics)] - word_topic_path = os.path.join(model_dir, config.word_topic_file) - vocab_path = os.path.join(model_dir, config.vocab_file) - self.load_model(word_topic_path, vocab_path) - - def term_id(self, term): - return self.__vocab.get_id(term) - - def load_model(self, word_topic_path, vocab_path): - - # Loading vocabulary - self.__vocab.load(vocab_path) - - self.__beta_sum = self.__beta * self.__vocab.size() - self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 - self.__load_word_dict(word_topic_path) - logger.info( - "Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % - (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) - - def word_topic_value(self, word_id, topic_id): - """Return value of specific word under specific topic in the model. - """ - word_dict = self.__word_topic[word_id] - if topic_id not in word_dict: - return 0 - return word_dict[topic_id] - - def word_topic(self, term_id): - """Return the topic distribution of a word. - """ - return self.__word_topic[term_id] - - def topic_sum_value(self, topic_id): - return self.__topic_sum[topic_id] - - def topic_sum(self): - return self.__topic_sum - - def num_topics(self): - return self.__num_topics - - def vocab_size(self): - return self.__vocab.size() - - def alpha(self): - return self.__alpha - - def alpha_sum(self): - return self.__alpha_sum - - def beta(self): - return self.__beta - - def beta_sum(self): - return self.__beta_sum - - def type(self): - return self.__type - - def __load_word_dict(self, word_dict_path): - """Load the word topic parameters. - """ - logger.info("Loading word topic.") - with open(word_dict_path, 'r', encoding='utf-8') as f: - for line in tqdm(f.readlines()): - fields = line.strip().split(" ") - assert len(fields) > 0, "Model file format error!" - term_id = int(fields[0]) - assert term_id < self.vocab_size(), "Term id out of range!" - assert term_id >= 0, "Term id out of range!" - for i in range(1, len(fields)): - topic_count = fields[i].split(":") - assert len(topic_count) == 2, "Topic count format error!" - - topic_id = int(topic_count[0]) - assert topic_id >= 0, "Topic out of range!" - assert topic_id < self.__num_topics, "Topic out of range!" - - count = int(topic_count[1]) - assert count >= 0, "Topic count error!" - - self.__word_topic[term_id][topic_id] = count - self.__topic_sum[topic_id] += count - self.__topic_words[topic_id].append( - WordCount(term_id, count)) - new_dict = OrderedDict() - for key in sorted(self.__word_topic[term_id]): - new_dict[key] = self.__word_topic[term_id][key] - self.__word_topic[term_id] = new_dict - - def get_vocab(self): - return self.__vocab.vocabulary() - - def topic_words(self): - return self.__topic_words diff --git a/hub_module/modules/text/semantic_model/slda_weibo/module.py b/hub_module/modules/text/semantic_model/slda_weibo/module.py deleted file mode 100644 index f0301e840beed1b7ab91e6426f975a8010600c62..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/module.py +++ /dev/null @@ -1,103 +0,0 @@ -import os - -import paddlehub as hub -from paddlehub.module.module import moduleinfo -from paddlehub.common.logger import logger - -from slda_weibo.inference_engine import InferenceEngine -from slda_weibo.document import SLDADoc -from slda_weibo.semantic_matching import SemanticMatching, WordAndDis -from slda_weibo.tokenizer import LACTokenizer, SimpleTokenizer -from slda_weibo.config import ModelType -from slda_weibo.vocab import Vocab, WordCount - - -@moduleinfo( - name="slda_weibo", - version="1.0.0", - summary= - "This is a PaddleHub Module for SLDA topic model in weibo dataset, where we can infer the topic distribution of document.", - author="DesmonDay", - author_email="", - type="nlp/semantic_model") -class TopicModel(hub.Module): - def _initialize(self): - """ - Initialize with the necessary elements. - """ - self.model_dir = os.path.join(self.directory, 'weibo') - self.conf_file = 'slda.conf' - self.__engine = InferenceEngine(self.model_dir, self.conf_file) - self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') - lac = hub.Module(name="lac") - # self.__tokenizer = SimpleTokenizer(self.vocab_path) - self.__tokenizer = LACTokenizer(self.vocab_path, lac) - - self.vocabulary = self.__engine.get_model().get_vocab() - self.config = self.__engine.get_config() - self.topic_words = self.__engine.get_model().topic_words() - self.topic_sum_table = self.__engine.get_model().topic_sum() - - def take_elem(word_count): - return word_count.count - - for i in range(self.config.num_topics): - self.topic_words[i].sort(key=take_elem, reverse=True) - - logger.info("Finish initialization.") - - def infer_doc_topic_distribution(self, document): - """ - This interface infers the topic distribution of document. - - Args: - document(str): the input document text. - - Returns: - results(list): returns the topic distribution of document. - """ - tokens = self.__tokenizer.tokenize(document) - if tokens == []: - return [] - results = [] - sentences = [] - sent = [] - for i in range(len(tokens)): - sent.append(tokens[i]) - if len(sent) % 5 == 0: - sentences.append(sent) - sent = [] - if len(sent) > 0: - sentences.append(sent) - - doc = SLDADoc() - self.__engine.infer(sentences, doc) - topics = doc.sparse_topic_dist() - for topic in topics: - results.append({"topic id": topic.tid, "distribution": topic.prob}) - return results - - def show_topic_keywords(self, topic_id, k=10): - """ - This interface returns the k keywords under specific topic. - - Args: - topic_id(int): topic information we want to know. - k(int): top k keywords. - - Returns: - results(dict): contains specific topic's keywords and corresponding - probability. - """ - EPS = 1e-8 - results = {} - if 0 <= topic_id < self.config.num_topics: - k = min(k, len(self.topic_words[topic_id])) - for i in range(k): - prob = self.topic_words[topic_id][i].count / \ - (self.topic_sum_table[topic_id] + EPS) - results[self.vocabulary[self.topic_words[topic_id] - [i].word_id]] = prob - return results - else: - logger.error("%d is out of range!" % topic_id) diff --git a/hub_module/modules/text/semantic_model/slda_weibo/sampler.py b/hub_module/modules/text/semantic_model/slda_weibo/sampler.py deleted file mode 100644 index 667e465bf2062af06740cad0cf09897e859a7c00..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/sampler.py +++ /dev/null @@ -1,311 +0,0 @@ -import os - -import numpy as np -from tqdm import tqdm -from paddlehub.common.logger import logger - -from slda_weibo.document import LDADoc, SLDADoc, Token, Sentence -from slda_weibo.vose_alias import VoseAlias -from slda_weibo.util import rand, rand_k - - -class Sampler(object): - def __init__(self): - pass - - def sample_doc(self, doc): - """Sample LDA or SLDA topics for documents. - """ - raise NotImplementedError - - -class MHSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - self.__topic_indexes = None - self.__alias_tables = None - self.__prob_sum = None - self.__beta_alias = VoseAlias() - self.__beta_prior_sum = None - self.__mh_steps = 2 - self.__construct_alias_table() - - def __construct_alias_table(self): - """Construct alias table for all words. - """ - logger.info("Construct alias table for alias sampling method.") - vocab_size = self.__model.vocab_size() - self.__topic_indexes = [[] for _ in range(vocab_size)] - self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] - self.__prob_sum = np.zeros(vocab_size) - - # Construct each word's alias table (prior is not included). - for i in tqdm(range(vocab_size)): - dist = [] - prob_sum = 0 - for key in self.__model.word_topic(i): - topic_id = key - word_topic_count = self.__model.word_topic(i)[key] - topic_sum = self.__model.topic_sum_value(topic_id) - - self.__topic_indexes[i].append(topic_id) - q = word_topic_count / (topic_sum + self.__model.beta_sum()) - dist.append(q) - prob_sum += q - self.__prob_sum[i] = prob_sum - if len(dist) > 0: - dist = np.array(dist, dtype=np.float) - self.__alias_tables[i].initialize(dist) - - # Build prior parameter beta's alias table. - beta_dist = self.__model.beta() / ( - self.__model.topic_sum() + self.__model.beta_sum()) - self.__beta_prior_sum = np.sum(beta_dist) - self.__beta_alias.initialize(beta_dist) - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - new_topic = token.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, token) - new_topic = self.__word_proposal(doc, token, doc_proposed_topic) - return new_topic - - def __sample_sentence(self, doc, sent): - new_topic = sent.topic - for i in range(self.__mh_steps): - doc_proposed_topic = self.__doc_proposal(doc, sent) - new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) - return new_topic - - def __doc_proposal(self, doc, token): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.token(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - dart = rand() * (doc.size() + self.__model.alpha_sum()) - if dart < doc.size(): - token_index = int(dart) - new_topic = doc.sent(token_index).topic - else: - new_topic = rand_k(self.__model.num_topics()) - - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__doc_proposal_distribution(doc, old_topic) - proposal_new = self.__doc_proposal_distribution(doc, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - - return new_topic - - def __word_proposal(self, doc, token, old_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - new_topic = self.__propose(token.id) - if new_topic != old_topic: - proposal_old = self.__word_proposal_distribution( - token.id, old_topic) - proposal_new = self.__word_proposal_distribution( - token.id, new_topic) - proportion_old = self.__proportional_function( - doc, token, old_topic) - proportion_new = self.__proportional_function( - doc, token, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - return (new_topic & mask) | (old_topic & ~mask) - return new_topic - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - new_topic = old_topic - for word_id in sent.tokens: - new_topic = self.__propose(word_id) - if new_topic != old_topic: - proportion_old = self.__proportional_function( - doc, sent, old_topic) - proportion_new = self.__proportional_function( - doc, sent, new_topic) - proposal_old = self.__word_proposal_distribution( - word_id, old_topic) - proposal_new = self.__word_proposal_distribution( - word_id, new_topic) - transition_prob = float((proportion_new * proposal_old) / - (proportion_old * proposal_new)) - rejection = rand() - mask = -(rejection < transition_prob) - new_topic = (new_topic & mask) | (old_topic & ~mask) - return new_topic - - def __proportional_function(self, doc, token, new_topic): - if isinstance(doc, LDADoc) and isinstance(token, Token): - old_topic = token.topic - dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() - wt_beta = self.__model.word_topic_value( - token.id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - return dt_alpha * wt_beta / t_sum_beta_sum - - elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): - sent = token - old_topic = sent.topic - result = doc.topic_sum(new_topic) + self.__model.alpha() - if new_topic == old_topic: - result -= 1 - for word_id in sent.tokens: - wt_beta = self.__model.word_topic_value( - word_id, new_topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - new_topic) + self.__model.beta_sum() - if new_topic == old_topic and wt_beta > 1: - wt_beta -= 1 - t_sum_beta_sum -= 1 - result *= wt_beta / t_sum_beta_sum - return result - else: - logger.error("Wrong input argument type!") - - def __word_proposal_distribution(self, word_id, topic): - wt_beta = self.__model.word_topic_value(word_id, - topic) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum_value( - topic) + self.__model.beta_sum() - return wt_beta / t_sum_beta_sum - - def __doc_proposal_distribution(self, doc, topic): - return doc.topic_sum(topic) + self.__model.alpha() - - def __propose(self, word_id): - dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) - if dart < self.__prob_sum[word_id]: - idx = self.__alias_tables[word_id].generate() - topic = self.__topic_indexes[word_id][idx] - else: - topic = self.__beta_alias.generate() - return topic - - -class GibbsSampler(Sampler): - def __init__(self, model): - super().__init__() - self.__model = model - - def sample_doc(self, doc): - if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_token(doc, doc.token(i)) - doc.set_topic(i, new_topic) - elif isinstance(doc, SLDADoc): - for i in range(doc.size()): - new_topic = self.__sample_sentence(doc, doc.sent(i)) - doc.set_topic(i, new_topic) - - def __sample_token(self, doc, token): - old_topic = token.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for i in range(num_topics): - dt_alpha = doc.topic_sum(i) + self.__model.alpha() - wt_beta = self.__model.word_topic_value(token.id, - i) + self.__model.beta() - t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() - if i == old_topic and wt_beta > 1: - if dt_alpha > 1: - dt_alpha -= 1 - wt_beta -= 1 - t_sum_beta_sum -= 1 - prob[i] = dt_alpha * wt_beta / t_sum_beta_sum - sum_ += prob[i] - accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] - - dart = rand() * sum_ - if dart <= accum_prob[0]: - return 0 - for i in range(1, num_topics): - if accum_prob[i - 1] < dart <= accum_prob[i]: - return i - return num_topics - 1 - - def __sample_sentence(self, doc, sent): - old_topic = sent.topic - num_topics = self.__model.num_topics() - accum_prob = np.zeros(num_topics) - prob = np.zeros(num_topics) - sum_ = 0 - for t in range(num_topics): - dt_alpha = doc.topic_sum(t) + self.__model.alpha() - t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() - if t == old_topic: - if dt_alpha > 1: - dt_alpha -= 1 - if t_sum_beta_sum > 1: - t_sum_beta_sum -= 1 - prob[t] = dt_alpha - for i in range(len(sent.tokens)): - w = sent.tokens[i] - wt_beta = self.__model.word_topic_value( - w, t) + self.__model.beta() - if t == old_topic and wt_beta > 1: - wt_beta -= 1 - # Note: if the length of the sentence is too long, the probability will be - # too small and the accuracy will be lost if there are too many multiply items - prob[t] *= wt_beta / t_sum_beta_sum - sum_ += prob[t] - accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] - - dart = rand() * sum - if dart <= accum_prob[0]: - return 0 - for t in range(1, num_topics): - if accum_prob[t - 1] < dart <= accum_prob[t]: - return t - return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_weibo/tokenizer.py b/hub_module/modules/text/semantic_model/slda_weibo/tokenizer.py deleted file mode 100644 index 10167882dd791f2f658853352e6b3773677f5b59..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import os - -import numpy as np -from paddlehub.common.logger import logger - - -class Tokenizer(object): - """Base tokenizer class. - """ - - def __init__(self): - pass - - def tokenize(self, text): - raise NotImplementedError - - -class SimpleTokenizer(Tokenizer): - """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only - be used in topic model demo, but not in real business application scenarios. - - Notes: This tokenizer can only recognize the words in the corresponding vocab file. - """ - - def __init__(self, vocab_path): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__load_vocab(vocab_path) - - def tokenize(self, text): - """Tokenize the input string `text`, and return the tokenize result. - """ - text_len = len(text) - result = [] - i = 0 - while i < text_len: - word = found_word = "" - # Deal with English characters. - if self.__is_eng_char(text[i]): - for j in range(i, text_len + 1): - if j < text_len and self.__is_eng_char(text[j]): - word += self.__tolower(text[j]) - else: - # Forward matching by character granularity. - if word in self.__vocab: - result.append(word) - i = j - 1 - break - else: - for j in range(i, min(i + self.__max_word_len, text_len)): - word += text[j] - if word in self.__vocab: - found_word = word - if len(found_word) > 0: - result.append(found_word) - i += len(found_word) - 1 - i += 1 - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def __is_eng_char(self, c): - """Check whether char c is an English character. - """ - return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') - - def __tolower(self, c): - """Return the lowercase character of the corresponding character, or return - the original character if there is no corresponding lowercase character. - """ - return c.lower() - - -class LACTokenizer(Tokenizer): - def __init__(self, vocab_path, lac): - super().__init__() - self.__max_word_len = 0 - self.__vocab = set() - self.__lac = lac - self.__load_vocab(vocab_path) - - def __load_vocab(self, vocab_path): - """Load the word dictionary. - """ - with open(vocab_path, 'r', encoding='utf-8') as fin: - vocab_size = 0 - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len(fields) >= 2 - word = fields[1] - self.__max_word_len = max(self.__max_word_len, len(word)) - self.__vocab.add(word) - vocab_size += 1 - - def tokenize(self, text): - results = self.__lac.lexical_analysis( - texts=[text], use_gpu=False, batch_size=1, return_tag=True) - # Change English words to lower case. - # And just preserve the word in vocab. - words = results[0]["word"] - result = [] - for word in words: - word = word.lower() - if word in self.__vocab: - result.append(word) - return result - - def contains(self, word): - """Check whether the word is in the vocabulary. - """ - return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_weibo/vocab.py b/hub_module/modules/text/semantic_model/slda_weibo/vocab.py deleted file mode 100644 index cc8d15e90a32b5ac556a102192c8a4e884b580ef..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/semantic_model/slda_weibo/vocab.py +++ /dev/null @@ -1,43 +0,0 @@ -from paddlehub.common.logger import logger - -OOV = -1 - - -class WordCount(object): - def __init__(self, word_id, count): - self.word_id = word_id - self.count = count - - -class Vocab(object): - def __init__(self): - self.__term2id = {} - self.__id2term = {} - - def get_id(self, word): - if word not in self.__term2id: - return OOV - return self.__term2id[word] - - def load(self, vocab_file): - self.__term2id = {} - self.__id2term = {} - with open(vocab_file, 'r', encoding='utf-8') as fin: - for line in fin.readlines(): - fields = line.strip().split('\t') - assert len( - fields) == 5, "Vocabulary file [%s] format error!" % ( - vocab_file) - term = fields[1] - id_ = int(fields[2]) - if term in self.__term2id: - logger.error("Duplicate word [%s] in vocab file!" % (term)) - continue - self.__term2id[term] = id_ - self.__id2term[id_] = term - - def size(self): - return len(self.__term2id) - - def vocabulary(self): - return self.__id2term diff --git a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py b/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py deleted file mode 100644 index 058e0d95ae0b5b5e6abbe6dc3fad65023812d1e4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py +++ /dev/null @@ -1,257 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from emotion_detection_textcnn.net import textcnn_net -from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="emotion_detection_textcnn", - version="1.2.0", - summary="Baidu's open-source Emotion Detection Model(TextCNN).", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class EmotionDetectionTextCNN(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - self._word_seg_module = None - - self.predict = self.emotion_classify - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained emotion_detection_textcnn - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of emotion_detection_textcnn (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 240466 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = textcnn_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the emotion_detection_textcnn pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - @serving - def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): - """ - Get the emotion prediction results results with the texts as input - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - Returns: - results(list): the emotion prediction results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, - self.vocab, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"positive": 2, "negative": 0, "neutral": 1} - return self.labels - - -if __name__ == "__main__": - emotion_detection_textcnn = EmotionDetectionTextCNN() - inputs, outputs, main_program = emotion_detection_textcnn.context( - num_slots=3) - print(inputs) - print(outputs) - # Data to be predicted - test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"] - - input_dict = {"text": test_text} - results = emotion_detection_textcnn.emotion_classify( - data=input_dict, batch_size=2) - for result in results: - print(result['text']) - print(result['emotion_label']) - print(result['emotion_key']) - print(result['positive_probs']) - print(result['negative_probs']) - print(result['neutral_probs']) diff --git a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py b/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py deleted file mode 100644 index ff405ee1fcb04880475244825941b02d0a054706..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def textcnn_net(emb, - seq_len, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=3, - win_sizes=None): - """ - Textcnn_net - """ - if win_sizes is None: - win_sizes = [1, 2, 3] - - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # convolution layer - convs = [] - for win_size in win_sizes: - conv_h = fluid.nets.sequence_conv_pool( - input=unpad_feature, - num_filters=hid_dim, - filter_size=win_size, - act="tanh", - pool_type="max") - convs.append(conv_h) - convs_out = fluid.layers.concat(input=convs, axis=1) - - # full connect layer - fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh") - # softmax layer - prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") - - return prediction, fc_1 diff --git a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py b/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py deleted file mode 100644 index 97391922cfacf5646f7b37b4b09cbc66bfcbd863..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py +++ /dev/null @@ -1,69 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as fin: - wid = 0 - for line in fin: - data = line.strip().split("\t") - if len(data) == 1: - wstr = '' - vocab[wstr] = int(data[0]) - continue - else: - wstr = data[0] - vocab[wstr] = int(data[1]) - vocab[""] = len(vocab) - return vocab - - -def get_predict_label(probs): - label = int(np.argmax(probs)) - if label == 0: - key = "negative" - elif label == 2: - key = "positive" - else: - key = "neutral" - return label, key - - -def preprocess(lac, predicted_data, word_dict, use_gpu=False, batch_size=1): - result = [] - data_dict = {"text": predicted_data} - processed = lac.lexical_analysis( - data=data_dict, use_gpu=use_gpu, batch_size=batch_size) - unk_id = word_dict[""] - for index, data in enumerate(processed): - result_i = {'processed': []} - result_i['origin'] = predicted_data[index] - for word in data['word']: - if word in word_dict: - _index = word_dict[word] - else: - _index = unk_id - result_i['processed'].append(_index) - result.append(result_i) - return result - - -def postprocess(prediction, texts): - result = [] - pred = prediction.as_ndarray() - for index in range(len(texts)): - result_i = {} - result_i['text'] = texts[index]['origin'] - label, key = get_predict_label(pred[index]) - result_i['emotion_label'] = label - result_i['emotion_key'] = key - result_i['positive_probs'] = float('%.4f' % pred[index, 2]) - result_i['negative_probs'] = float('%.4f' % (pred[index, 0])) - result_i['neutral_probs'] = float('%.4f' % (pred[index, 1])) - result.append(result_i) - return result diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py b/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py deleted file mode 100644 index 5846878cd1fdface78bc1704a48e86e9f6d00250..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py +++ /dev/null @@ -1,377 +0,0 @@ -# -*- coding:utf-8 -** -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""ERNIE""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import json -import logging - -import paddle.fluid as fluid -import six - -from .transformer_encoder import encoder, pre_process_layer -from .transformer_encoder import gelu - - -class ErnieModel(object): - """ - ErnieModel - """ - - def __init__(self, - src_ids, - position_ids, - sentence_ids, - input_mask, - config, - weight_sharing=True, - use_fp16=False): - """ - :param src_ids: - :param position_ids: - :param sentence_ids: - :param input_mask: - :param config: - :param weight_sharing: - :param use_fp16: - """ - self._hidden_size = config.get('hidden_size', 768) - self._emb_size = config.get('emb_size', self._hidden_size) - self._n_layer = config.get('num_hidden_layers', 12) - self._n_head = config.get('num_attention_heads', 12) - self._voc_size = config.get('vocab_size', 30522) - self._max_position_seq_len = config.get('max_position_embeddings', 512) - self._param_share = config.get('param_share', "normal") - self._pre_encoder_cmd = config.get('pre_encoder_cmd', "nd") - self._preprocess_cmd = config.get('preprocess_cmd', "") - self._postprocess_cmd = config.get('postprocess_cmd', "dan") - self._epsilon = config.get('epsilon', 1e-05) - self._emb_mapping_in = config.get('emb_mapping_in', False) - self._n_layer_per_block = config.get('n_layer_per_block', 1) - - if config.has('sent_type_vocab_size'): - self._sent_types = config['sent_type_vocab_size'] - else: - self._sent_types = config.get('type_vocab_size', 2) - - self._use_sentence_id = config.get('use_sentence_id', True) - self._use_task_id = config.get('use_task_id', False) - if self._use_task_id: - self._task_types = config.get('task_type_vocab_size', 3) - self._hidden_act = config.get('hidden_act', 'gelu') - self._prepostprocess_dropout = config.get('hidden_dropout_prob', 0.1) - self._attention_dropout = config.get('attention_probs_dropout_prob', - 0.1) - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._task_emb_name = "task_embedding" - self._dtype = "float16" if use_fp16 else "float32" - self._emb_dtype = "float32" - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config.get('initializer_range', 0.02)) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - """ - :param src_ids: - :param position_ids: - :param sentence_ids: - :param input_mask: - :return: - """ - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - dtype=self._emb_dtype, - size=[self._voc_size, self._emb_size], - param_attr=fluid.ParamAttr( - name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - - position_emb_out = fluid.layers.embedding( - input=position_ids, - dtype=self._emb_dtype, - size=[self._max_position_seq_len, self._emb_size], - param_attr=fluid.ParamAttr( - name=self._pos_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - - if self._use_sentence_id: - sent_emb_out = fluid.layers.embedding( - sentence_ids, - dtype=self._emb_dtype, - size=[self._sent_types, self._emb_size], - param_attr=fluid.ParamAttr( - name=self._sent_emb_name, - initializer=self._param_initializer)) - - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, - self._pre_encoder_cmd, - self._prepostprocess_dropout, - name='pre_encoder', - epsilon=self._epsilon) - - if self._emb_mapping_in: - emb_out = fluid.layers.fc( - input=emb_out, - num_flatten_dims=2, - size=self._hidden_size, - param_attr=fluid.ParamAttr( - name='emb_hidden_mapping', - initializer=self._param_initializer), - bias_attr='emb_hidden_mapping_bias') - - if self._dtype == "float16": - emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - self_attn_mask = fluid.layers.matmul( - x=input_mask, y=input_mask, transpose_y=True) - - self_attn_mask = fluid.layers.scale( - x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack( - x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out, self._checkpoints = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._hidden_size // self._n_head, - d_value=self._hidden_size // self._n_head, - d_model=self._hidden_size, - d_inner_hid=self._hidden_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd=self._preprocess_cmd, - postprocess_cmd=self._postprocess_cmd, - param_initializer=self._param_initializer, - name='encoder', - param_share=self._param_share, - epsilon=self._epsilon, - n_layer_per_block=self._n_layer_per_block) - if self._dtype == "float16": - self._enc_out = fluid.layers.cast( - x=self._enc_out, dtype=self._emb_dtype) - - def get_sequence_output(self): - """ - :return: - """ - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice( - input=self._enc_out, axes=[1], starts=[0], ends=[1]) - """ - if self._dtype == "float16": - next_sent_feat = fluid.layers.cast( - x=next_sent_feat, dtype=self._emb_dtype) - - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", initializer=self._param_initializer), - bias_attr="mask_lm_trans_fc.b_0") - """ - """ - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", initializer=self._param_initializer), - bias_attr="mask_lm_trans_fc.b_0") - - """ - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._hidden_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_lm_output(self, mask_label, mask_pos): - """Get the loss & accuracy for pretraining""" - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - # extract the first token feature in each sentence - self.next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape( - x=self._enc_out, shape=[-1, self._hidden_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - if self._dtype == "float16": - mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) - - # transform: fc - if self._hidden_act == 'gelu' or self._hidden_act == 'gelu.precise': - _hidden_act = 'gelu' - elif self._hidden_act == 'gelu.approximate': - _hidden_act = None - else: - _hidden_act = self._hidden_act - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=_hidden_act, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_fc.w_0', - initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - if self._hidden_act == 'gelu.approximate': - mask_trans_feat = gelu(mask_trans_feat) - else: - pass - # transform: layer norm - mask_trans_feat = fluid.layers.layer_norm( - mask_trans_feat, - begin_norm_axis=len(mask_trans_feat.shape) - 1, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_bias', - initializer=fluid.initializer.Constant(1.))) - # transform: layer norm - # mask_trans_feat = pre_process_layer( - # mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var( - self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], - dtype=self._emb_dtype, - attr=mask_lm_out_bias_attr, - is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - return mean_mask_lm_loss - - def get_task_output(self, task, task_labels): - """ - :param task: - :param task_labels: - :return: - """ - task_fc_out = fluid.layers.fc( - input=self.next_sent_feat, - size=task["num_labels"], - param_attr=fluid.ParamAttr( - name=task["task_name"] + "_fc.w_0", - initializer=self._param_initializer), - bias_attr=task["task_name"] + "_fc.b_0") - task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( - logits=task_fc_out, label=task_labels, return_softmax=True) - task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) - mean_task_loss = fluid.layers.mean(task_loss) - return mean_task_loss, task_acc - - -class ErnieConfig(object): - """parse ernie config""" - - def __init__(self, config_path): - """ - :param config_path: - """ - self._config_dict = self._parse(config_path) - - def _parse(self, config_path): - """ - :param config_path: - :return: - """ - try: - with open(config_path, 'r') as json_file: - config_dict = json.load(json_file) - except Exception: - raise IOError( - "Error in parsing Ernie model config file '%s'" % config_path) - else: - return config_dict - - def __getitem__(self, key): - """ - :param key: - :return: - """ - return self._config_dict.get(key, None) - - def has(self, key): - """ - :param key: - :return: - """ - if key in self._config_dict: - return True - return False - - def get(self, key, default_value): - """ - :param key,default_value: - :retrun: - """ - if key in self._config_dict: - return self._config_dict[key] - else: - return default_value - - def print_config(self): - """ - :return: - """ - for arg, value in sorted(six.iteritems(self._config_dict)): - logging.info('%s: %s' % (arg, value)) - logging.info('------------------------------------------------') diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py b/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py deleted file mode 100644 index 80f7a7759ac16943dde323385b6351c7c5643bb4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py +++ /dev/null @@ -1,501 +0,0 @@ -# -*- coding:utf-8 -** -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers -import numpy as np - - -def gelu(x): - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - cdf = 0.5 * (1.0 + fluid.layers.tanh( - (np.sqrt(2.0 / np.pi) * (x + 0.044715 * fluid.layers.pow(x, 3.0))))) - return x * cdf - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors. but {} v.s. {} v.s. {}"\ - .format(queries.shape, keys.shape, values.shape)) - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat( - [layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat( - [layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - if hidden_act == 'gelu' or hidden_act == 'gelu.precise': - _hidden_act = 'gelu' - elif hidden_act == 'gelu.approximate': - _hidden_act = None - else: - _hidden_act = hidden_act - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=_hidden_act, - param_attr=fluid.ParamAttr( - name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if hidden_act == 'gelu.approximate': - hidden = gelu(hidden) - - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, - out, - process_cmd, - dropout_rate=0., - epsilon=1e-12, - name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + '_layer_norm_scale', - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + '_layer_norm_bias', - initializer=fluid.initializer.Constant(0.)), - epsilon=epsilon) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name='', - epsilon=1e-12, -): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - - attn_output = multi_head_attention( - enc_input, - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - - attn_output = post_process_layer( - enc_input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_att', - epsilon=epsilon) - - ffd_output = positionwise_feed_forward( - attn_output, - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - - return post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + '_post_ffn', - epsilon=epsilon), ffd_output - - -def encoder_inner_share(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - epsilon, - param_initializer=None, - name='', - n_layer_per_block=1): - """ - The encoder_inner_share is composed of n_layer_per_block layers returned by calling - encoder_layer. - """ - _checkpoints = [] - for i in range(n_layer_per_block): - enc_output, cp = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i), - epsilon=epsilon, - ) - _checkpoints.append(cp) - enc_input = enc_output - - return enc_output, _checkpoints - - -def encoder_outer_share(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - epsilon, - param_initializer=None, - name='', - n_layer_per_block=1): - """ - The encoder_outer_share is composed of n_layer_per_block layers returned by calling - encoder_layer. - """ - _checkpoints = [] - for i in range(n_layer_per_block): - enc_output, cp = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name, - epsilon=epsilon) - _checkpoints.append(cp) - enc_input = enc_output - - return enc_output, _checkpoints - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - epsilon, - n_layer_per_block, - param_initializer=None, - name='', - param_share=None): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer . - """ - checkpoints = [] - # for outer_share it will share same param in one block, - # and for inner_share it will share param across blocks, rather than in one same block - # - # outer-share inner-share - # [1] [1] ----\ 1st block - # [1] [2] ----/ - # [2] [1] ----\ 2nd block - # [2] [2] ----/ - - if param_share == "normal" or param_share == 'outer_share': - #n_layer_per_block=1, n_layer=24 for bert-large - #n_layer_per_block=1, n_layer=12 for bert-base - #n_layer_per_block=12, n_layer=12 for albert-xxlarge - #n_layer_per_block=6, n_layer=12 for albert-xxlarge-outershare - enc_fn = encoder_outer_share - name_fn = lambda i: name + '_layer_' + str(i) - elif param_share == "inner_share": - #n_layer_per_block = 2 - enc_fn = encoder_inner_share - name_fn = lambda i: name - else: - raise ValueError('unsupported param share mode') - - for i in range(n_layer // n_layer_per_block): - enc_output, cp = enc_fn( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name_fn(i), - n_layer_per_block=n_layer_per_block, - epsilon=epsilon, - ) - checkpoints.extend(cp) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, - preprocess_cmd, - prepostprocess_dropout, - name="post_encoder", - epsilon=epsilon) - - return enc_output, checkpoints diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py b/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py deleted file mode 100644 index 87187836e8a3c5c32a1c0620ea26cf8230729e0a..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py +++ /dev/null @@ -1,258 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import os - -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.reader.tokenization import convert_to_unicode, FullTokenizer -from paddlehub.reader.batching import pad_batch_data -import numpy as np - -from ernie_skep_sentiment_analysis.model.ernie import ErnieModel, ErnieConfig - - -@moduleinfo( - name="ernie_skep_sentiment_analysis", - version="1.0.0", - summary= - "SKEP: Sentiment Knowledge Enhanced Pre-training for Sentiment Analysis. Ernie_skep_sentiment_analysis module is initialize with enie_1.0_chn_large when pretraining. This module is finetuned on ChnSentiCorp dataset to do sentiment claasification. It can do sentiment analysis prediction directly, label as positive or negative.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis", -) -class ErnieSkepSentimentAnalysis(TransformerModule): - """ - Ernie_skep_sentiment_analysis module is initialize with enie_1.0_chn_large when pretraining. - This module is finetuned on ChnSentiCorp dataset to do sentiment claasification. - It can do sentiment analysis prediction directly, label as positive or negative. - """ - - def _initialize(self): - ernie_config_path = os.path.join(self.directory, "assets", - "ernie_1.0_large_ch.config.json") - self.ernie_config = ErnieConfig(ernie_config_path) - self.MAX_SEQ_LEN = 512 - self.vocab_path = os.path.join(self.directory, "assets", - "ernie_1.0_large_ch.vocab.txt") - self.params_path = os.path.join(self.directory, "assets", "params") - - self.infer_model_path = os.path.join(self.directory, "assets", - "inference_step_601") - self.tokenizer = FullTokenizer(vocab_file=self.vocab_path) - - self.vocab = self.tokenizer.vocab - self.pad_id = self.vocab["[PAD]"] - self.label_map = {0: 'negative', 1: 'positive'} - - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - model_file_path = os.path.join(self.infer_model_path, 'model') - params_file_path = os.path.join(self.infer_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - - config.disable_glog_info() - - self.predictor = create_paddle_predictor(config) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - ernie = ErnieModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.ernie_config, - use_fp16=False) - - pooled_output = ernie.get_pooled_output() - sequence_output = ernie.get_sequence_output() - return pooled_output, sequence_output - - def array2tensor(self, arr_data): - """ - convert numpy array to PaddleTensor - """ - tensor_data = PaddleTensor(arr_data) - return tensor_data - - @serving - def predict_sentiment(self, texts=[], use_gpu=False): - """ - Get the sentiment label for the predicted texts. It will be classified as positive and negative. - Args: - texts (list(str)): the data to be predicted. - use_gpu (bool): Whether to use gpu or not. - Returns: - res (list): The result of sentiment label and probabilties. - """ - - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - results = [] - for text in texts: - feature = self._convert_text_to_feature(text) - inputs = [self.array2tensor(ndarray) for ndarray in feature] - output = self.predictor.run(inputs) - probilities = np.array(output[0].data.float_data()) - label = self.label_map[np.argmax(probilities)] - result = { - 'text': text, - 'sentiment_label': label, - 'positive_probs': probilities[1], - 'negative_probs': probilities[0] - } - results.append(result) - - return results - - def _convert_text_to_feature(self, text): - """ - Convert the raw text to feature which is needed to run program (feed_vars). - """ - text_a = convert_to_unicode(text) - tokens_a = self.tokenizer.tokenize(text_a) - max_seq_len = 512 - - # Account for [CLS] and [SEP] with "- 2" - if len(tokens_a) > max_seq_len - 2: - tokens_a = tokens_a[0:(max_seq_len - 2)] - - tokens = [] - text_type_ids = [] - tokens.append("[CLS]") - text_type_ids.append(0) - for token in tokens_a: - tokens.append(token) - text_type_ids.append(0) - tokens.append("[SEP]") - text_type_ids.append(0) - - token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - position_ids = list(range(len(token_ids))) - task_ids = [0] * len(token_ids) - - padded_token_ids, input_mask = pad_batch_data([token_ids], - max_seq_len=max_seq_len, - pad_idx=self.pad_id, - return_input_mask=True) - padded_text_type_ids = pad_batch_data([text_type_ids], - max_seq_len=max_seq_len, - pad_idx=self.pad_id) - padded_position_ids = pad_batch_data([position_ids], - max_seq_len=max_seq_len, - pad_idx=self.pad_id) - padded_task_ids = pad_batch_data([task_ids], - max_seq_len=max_seq_len, - pad_idx=self.pad_id) - - feature = [ - padded_token_ids, padded_position_ids, padded_text_type_ids, - input_mask, padded_task_ids - ] - return feature - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.predict_sentiment( - texts=[args.input_text], use_gpu=args.use_gpu) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_text', type=str, default=None, help="data to be predicted") - - -if __name__ == '__main__': - test_module = ErnieSkepSentimentAnalysis() - test_texts = ['你不是不聪明,而是不认真', '虽然小明很努力,但是他还是没有考100分'] - results = test_module.predict_sentiment(test_texts, use_gpu=False) - print(results) - test_module.context(max_seq_len=128) - print(test_module.get_embedding(texts=[['你不是不聪明,而是不认真']])) - print(test_module.get_params_layer()) diff --git a/hub_module/modules/text/sentiment_analysis/senta_bilstm/module.py b/hub_module/modules/text/sentiment_analysis/senta_bilstm/module.py deleted file mode 100644 index 07db6b9f9484a19375d0cd8ee27189c32831048b..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_bilstm/module.py +++ /dev/null @@ -1,267 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_bilstm.net import bilstm_net -from senta_bilstm.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="senta_bilstm", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class SentaBiLSTM(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") - self.word_dict = load_vocab(self.vocab_path) - self._word_seg_module = None - - self.predict = self.sentiment_classify - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_bilstm - - Args: - trainable(bool): whether fine-tune the pretrained parameters of senta_bilstm or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_bilstm (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = bilstm_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - @serving - def sentiment_classify(self, texts=[], data={}, use_gpu=False, - batch_size=1): - """ - Get the sentiment prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the word segmentation results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, - self.word_dict, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"positive": 1, "negative": 0} - return self.labels - - -if __name__ == "__main__": - senta = SentaBiLSTM() - inputs, outputs, main_program = senta.context(num_slots=3) - print(inputs) - print(outputs) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict, batch_size=3) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/sentiment_analysis/senta_bilstm/net.py b/hub_module/modules/text/sentiment_analysis/senta_bilstm/net.py deleted file mode 100755 index 31d7a20ebd140f79794b456426bfbd85f818fe80..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_bilstm/net.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def bilstm_net(emb, - seq_len, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - emb_lr=30.0): - """ - Bi-Lstm net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # bi-lstm layer - fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) - rfc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False) - rlstm_h, c = fluid.layers.dynamic_lstm( - input=rfc0, size=hid_dim * 4, is_reverse=True) - - # extract last layer - lstm_last = fluid.layers.sequence_last_step(input=lstm_h) - rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h) - lstm_last_tanh = fluid.layers.tanh(lstm_last) - rlstm_last_tanh = fluid.layers.tanh(rlstm_last) - - # concat layer - lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1) - # full connect layer - fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh') - # softmax layer - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - - return prediction, fc1 diff --git a/hub_module/modules/text/sentiment_analysis/senta_bilstm/processor.py b/hub_module/modules/text/sentiment_analysis/senta_bilstm/processor.py deleted file mode 100644 index c3487fbbf86e95c5891f884704bb0f4989c506c8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_bilstm/processor.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - wid = 0 - for line in f: - parts = line.rstrip().split('\t') - vocab[parts[0]] = int(parts[1]) - vocab[""] = len(vocab) - return vocab - - -def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): - """ - firstly, the predicted texts are segmented by lac module - then, the word segmention results input into senta - """ - result = [] - input_dict = {'text': texts} - processed = lac.lexical_analysis( - data=input_dict, use_gpu=use_gpu, batch_size=batch_size) - unk_id = word_dict[""] - for index, data in enumerate(processed): - result_i = {'processed': []} - result_i['origin'] = texts[index] - for word in data['word']: - if word in word_dict: - _index = word_dict[word] - else: - _index = unk_id - result_i['processed'].append(_index) - result.append(result_i) - return result - - -def postprocess(predict_out, texts): - """ - Convert model's output tensor to sentiment label - """ - predict_out = predict_out.as_ndarray() - batch_size = len(texts) - result = [] - for index in range(batch_size): - result_i = {} - result_i['text'] = texts[index]['origin'] - label = int(np.argmax(predict_out[index])) - if label == 0: - key = 'negative' - else: - key = 'positive' - result_i['sentiment_label'] = label - result_i['sentiment_key'] = key - result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) - result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) - result.append(result_i) - return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_bow/module.py b/hub_module/modules/text/sentiment_analysis/senta_bow/module.py deleted file mode 100644 index 344e96afb39c329676c632a984a45762469db4df..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_bow/module.py +++ /dev/null @@ -1,268 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import serving, moduleinfo - -from senta_bow.net import bow_net -from senta_bow.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="senta_bow", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class SentaBow(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") - self.word_dict = load_vocab(self.vocab_path) - self._word_seg_module = None - - self.predict = self.sentiment_classify - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_bow - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_bow or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_bow (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = bow_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_bow pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - @serving - def sentiment_classify(self, texts=[], data={}, use_gpu=False, - batch_size=1): - """ - Get the sentiment prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the word segmentation results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, - self.word_dict, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"positive": 1, "negative": 0} - return self.labels - - -if __name__ == "__main__": - senta = SentaBow() - inputs, outputs, main_program = senta.context(num_slots=3) - print(inputs) - print('*' * 20) - print(outputs) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/sentiment_analysis/senta_bow/processor.py b/hub_module/modules/text/sentiment_analysis/senta_bow/processor.py deleted file mode 100644 index f17284cdca699f07b59f4cf1ce988e9fdf7c305a..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_bow/processor.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - wid = 0 - for line in f: - parts = line.rstrip().split('\t') - vocab[parts[0]] = int(parts[1]) - vocab[""] = len(vocab) - return vocab - - -def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): - """ - firstly, the predicted texts are segmented by lac module - then, the word segmention results input into senta - """ - result = [] - input_dict = {"text": texts} - processed = lac.lexical_analysis( - data=input_dict, use_gpu=use_gpu, batch_size=batch_size) - unk_id = word_dict[""] - for index, data in enumerate(processed): - result_i = {'processed': []} - result_i['origin'] = texts[index] - for word in data['word']: - if word in word_dict: - _index = word_dict[word] - else: - _index = unk_id - result_i['processed'].append(_index) - result.append(result_i) - return result - - -def postprocess(predict_out, texts): - """ - Convert model's output tensor to sentiment label - """ - predict_out = predict_out.as_ndarray() - batch_size = len(texts) - result = [] - for index in range(batch_size): - result_i = {} - result_i['text'] = texts[index]['origin'] - label = int(np.argmax(predict_out[index])) - if label == 0: - key = 'negative' - else: - key = 'positive' - result_i['sentiment_label'] = label - result_i['sentiment_key'] = key - result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) - result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) - result.append(result_i) - return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_cnn/module.py b/hub_module/modules/text/sentiment_analysis/senta_cnn/module.py deleted file mode 100644 index c0339de9bbcffba7ed37e5099b8834e5f18cc6b4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_cnn/module.py +++ /dev/null @@ -1,262 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_cnn.net import cnn_net -from senta_cnn.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="senta_cnn", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class SentaCNN(hub.NLPPredictionModule): - def _initialize(self, user_dict=None): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") - self.word_dict = load_vocab(self.vocab_path) - self._word_seg_module = None - - self.predict = self.sentiment_classify - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_cnn - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_cnn or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_cnn (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = cnn_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - @serving - def sentiment_classify(self, texts=[], data={}, use_gpu=False, - batch_size=1): - """ - Get the sentiment prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the word segmentation results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, - self.word_dict, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"positive": 1, "negative": 0} - return self.labels - - -if __name__ == "__main__": - senta = SentaCNN() - inputs, outputs, program = senta.context(num_slots=3) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/sentiment_analysis/senta_cnn/net.py b/hub_module/modules/text/sentiment_analysis/senta_cnn/net.py deleted file mode 100755 index 652234b6f036d17a51af0bc7b2c52a15ef4e9632..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_cnn/net.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def cnn_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2, win_size=3): - """ - Conv net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # convolution layer - conv_3 = fluid.nets.sequence_conv_pool( - input=unpad_feature, - num_filters=hid_dim, - filter_size=win_size, - act="tanh", - pool_type="max") - # full connect layer - fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2) - - # softmax layer - prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") - - return prediction, fc_1 diff --git a/hub_module/modules/text/sentiment_analysis/senta_cnn/processor.py b/hub_module/modules/text/sentiment_analysis/senta_cnn/processor.py deleted file mode 100644 index c3487fbbf86e95c5891f884704bb0f4989c506c8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_cnn/processor.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - wid = 0 - for line in f: - parts = line.rstrip().split('\t') - vocab[parts[0]] = int(parts[1]) - vocab[""] = len(vocab) - return vocab - - -def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): - """ - firstly, the predicted texts are segmented by lac module - then, the word segmention results input into senta - """ - result = [] - input_dict = {'text': texts} - processed = lac.lexical_analysis( - data=input_dict, use_gpu=use_gpu, batch_size=batch_size) - unk_id = word_dict[""] - for index, data in enumerate(processed): - result_i = {'processed': []} - result_i['origin'] = texts[index] - for word in data['word']: - if word in word_dict: - _index = word_dict[word] - else: - _index = unk_id - result_i['processed'].append(_index) - result.append(result_i) - return result - - -def postprocess(predict_out, texts): - """ - Convert model's output tensor to sentiment label - """ - predict_out = predict_out.as_ndarray() - batch_size = len(texts) - result = [] - for index in range(batch_size): - result_i = {} - result_i['text'] = texts[index]['origin'] - label = int(np.argmax(predict_out[index])) - if label == 0: - key = 'negative' - else: - key = 'positive' - result_i['sentiment_label'] = label - result_i['sentiment_key'] = key - result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) - result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) - result.append(result_i) - return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_gru/module.py b/hub_module/modules/text/sentiment_analysis/senta_gru/module.py deleted file mode 100644 index 116b564afbdc2045db46398b6545c12e6763b89d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_gru/module.py +++ /dev/null @@ -1,262 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_gru.net import gru_net -from senta_gru.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="senta_gru", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class SentaGRU(hub.NLPPredictionModule): - def _initialize(self, user_dict=None): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") - self.word_dict = load_vocab(self.vocab_path) - self._word_seg_module = None - - self.predict = self.sentiment_classify - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def context(self, trainable=False, max_seq_len=128, num_data=1): - """ - Get the input ,output and program of the pretrained senta_gru - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_gru or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_data(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_gru (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_data >= 1 and num_data <= 3, "num_data(%d) must be 1, 2, or 3" % num_data - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = gru_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_data > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_data > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - @serving - def sentiment_classify(self, texts=[], data={}, use_gpu=False, - batch_size=1): - """ - Get the sentiment prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the word segmentation results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, - self.word_dict, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"positive": 1, "negative": 0} - return self.labels - - -if __name__ == "__main__": - senta = SentaGRU() - inputs, outputs, main_program = senta.context(num_slots=3) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/sentiment_analysis/senta_gru/net.py b/hub_module/modules/text/sentiment_analysis/senta_gru/net.py deleted file mode 100755 index 7e6a313ee13c08eff243c7fab367f53fb45c17d1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_gru/net.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def gru_net(emb, - seq_len, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - emb_lr=30.0): - """ - gru net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 3) - - # GRU layer - gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False) - gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max') - gru_max_tanh = fluid.layers.tanh(gru_max) - - # full connect layer - fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh') - # softmax layer - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - return prediction, fc1 diff --git a/hub_module/modules/text/sentiment_analysis/senta_gru/processor.py b/hub_module/modules/text/sentiment_analysis/senta_gru/processor.py deleted file mode 100644 index c3487fbbf86e95c5891f884704bb0f4989c506c8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_gru/processor.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - wid = 0 - for line in f: - parts = line.rstrip().split('\t') - vocab[parts[0]] = int(parts[1]) - vocab[""] = len(vocab) - return vocab - - -def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): - """ - firstly, the predicted texts are segmented by lac module - then, the word segmention results input into senta - """ - result = [] - input_dict = {'text': texts} - processed = lac.lexical_analysis( - data=input_dict, use_gpu=use_gpu, batch_size=batch_size) - unk_id = word_dict[""] - for index, data in enumerate(processed): - result_i = {'processed': []} - result_i['origin'] = texts[index] - for word in data['word']: - if word in word_dict: - _index = word_dict[word] - else: - _index = unk_id - result_i['processed'].append(_index) - result.append(result_i) - return result - - -def postprocess(predict_out, texts): - """ - Convert model's output tensor to sentiment label - """ - predict_out = predict_out.as_ndarray() - batch_size = len(texts) - result = [] - for index in range(batch_size): - result_i = {} - result_i['text'] = texts[index]['origin'] - label = int(np.argmax(predict_out[index])) - if label == 0: - key = 'negative' - else: - key = 'positive' - result_i['sentiment_label'] = label - result_i['sentiment_key'] = key - result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) - result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) - result.append(result_i) - return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_lstm/module.py b/hub_module/modules/text/sentiment_analysis/senta_lstm/module.py deleted file mode 100644 index a34ab1d422d0924ab5e720e22fea93c4817fd155..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_lstm/module.py +++ /dev/null @@ -1,256 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_lstm.net import lstm_net -from senta_lstm.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="senta_lstm", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class SentaLSTM(hub.NLPPredictionModule): - def _initialize(self, user_dict=None): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", - "infer_model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.word_dict = load_vocab(self.vocab_path) - self._word_seg_module = None - - self.predict = self.sentiment_classify - - self._set_config() - - @property - def word_seg_module(self): - """ - lac module - """ - if not self._word_seg_module: - self._word_seg_module = hub.Module(name="lac") - return self._word_seg_module - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_lstm - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_lstm or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_lstm (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data( - name="text", - shape=[-1, max_seq_len, 1], - dtype="int64", - lod_level=0) - seq_len = fluid.layers.data( - name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = lstm_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data( - name='text_2', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data( - name='text_3', - shape=[-1, max_seq_len], - dtype='int64', - lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter( - lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix( - program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists( - os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars( - exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": - main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": - main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[ - prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block( - ).vars[prefix_name + emb_name_list[index]] - return inputs, outputs, main_program - - @serving - def sentiment_classify(self, texts=[], data={}, use_gpu=False, - batch_size=1): - """ - Get the sentiment prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the word segmentation results - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(self.word_seg_module, batch_data, - self.word_dict, use_gpu, batch_size) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"positive": 1, "negative": 0} - return self.labels - - -if __name__ == "__main__": - senta = SentaLSTM() - senta.context(num_slots=3) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/sentiment_analysis/senta_lstm/net.py b/hub_module/modules/text/sentiment_analysis/senta_lstm/net.py deleted file mode 100755 index ecf8c5321cfc7438da0b57427bc3fee9cbc8a082..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_lstm/net.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def lstm_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): - """ - Lstm net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - # Lstm layer - fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm( - input=fc0, size=hid_dim * 4, is_reverse=False) - # max pooling layer - lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') - lstm_max_tanh = fluid.layers.tanh(lstm_max) - # full connect layer - fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') - # softmax layer - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - - return prediction, fc1 diff --git a/hub_module/modules/text/sentiment_analysis/senta_lstm/processor.py b/hub_module/modules/text/sentiment_analysis/senta_lstm/processor.py deleted file mode 100644 index c3487fbbf86e95c5891f884704bb0f4989c506c8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/sentiment_analysis/senta_lstm/processor.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding:utf-8 -*- -import io -import numpy as np - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - wid = 0 - for line in f: - parts = line.rstrip().split('\t') - vocab[parts[0]] = int(parts[1]) - vocab[""] = len(vocab) - return vocab - - -def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): - """ - firstly, the predicted texts are segmented by lac module - then, the word segmention results input into senta - """ - result = [] - input_dict = {'text': texts} - processed = lac.lexical_analysis( - data=input_dict, use_gpu=use_gpu, batch_size=batch_size) - unk_id = word_dict[""] - for index, data in enumerate(processed): - result_i = {'processed': []} - result_i['origin'] = texts[index] - for word in data['word']: - if word in word_dict: - _index = word_dict[word] - else: - _index = unk_id - result_i['processed'].append(_index) - result.append(result_i) - return result - - -def postprocess(predict_out, texts): - """ - Convert model's output tensor to sentiment label - """ - predict_out = predict_out.as_ndarray() - batch_size = len(texts) - result = [] - for index in range(batch_size): - result_i = {} - result_i['text'] = texts[index]['origin'] - label = int(np.argmax(predict_out[index])) - if label == 0: - key = 'negative' - else: - key = 'positive' - result_i['sentiment_label'] = label - result_i['sentiment_key'] = key - result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) - result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) - result.append(result_i) - return result diff --git a/hub_module/modules/text/syntactic_analysis/DDParser/module.py b/hub_module/modules/text/syntactic_analysis/DDParser/module.py deleted file mode 100644 index 2d451703a73333b1de509ccc3fb37bbd627df60a..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/syntactic_analysis/DDParser/module.py +++ /dev/null @@ -1,201 +0,0 @@ -# -*- coding:utf-8 -*- -import os -import argparse - -import numpy as np -import matplotlib as mpl -import matplotlib.pyplot as plt -import matplotlib.font_manager as font_manager -from paddle import fluid -import paddlehub as hub -from paddlehub.module.module import serving, moduleinfo, runnable - -try: - from ddparser import DDParser as DDParserModel -except: - raise ImportError( - "The module requires additional dependencies: ddparser. Please run 'pip install ddparser' to install it." - ) - - -@moduleinfo( - name="ddparser", - version="1.0.0", - summary="Baidu's open-source DDParser model.", - author="baidu-nlp", - author_email="", - type="nlp/syntactic_analysis") -class ddparser(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.ddp = DDParserModel(prob=True, use_pos=True) - self.font = font_manager.FontProperties( - fname=os.path.join(self.directory, "SourceHanSans-Regular.ttf")) - - @serving - def serving_parse(self, texts=[], return_visual=False): - results = self.parse(texts, return_visual) - if return_visual: - for i, result in enumerate(results): - result['visual'] = result['visual'].tolist() - - return results - - def parse(self, texts=[], return_visual=False): - """ - parse the dependency. - - Args: - texts(list[list[str] or list[list[str]]]): the input texts to be parse. It should be a list with elements: untokenized string or tokens list. - return_visual(bool): if set True, the result will contain the dependency visualization. - - Returns: - results(list[dict]): a list, with elements corresponding to each of the elements in texts. The element is a dictionary of shape: - { - 'word': list[str], the tokenized words. - 'head': list[int], the head ids. - 'deprel': list[str], the dependency relation. - 'prob': list[float], the prediction probility of the dependency relation. - 'postag': list[str], the POS tag. If the element of the texts is list, the key 'postag' will not return. - 'visual' : list[numpy.array]: the dependency visualization. Use cv2.imshow to show or cv2.imwrite to save it. If return_visual=False, it will not return. - } - """ - - if not texts: - return - if all([isinstance(i, str) and i for i in texts]): - do_parse = self.ddp.parse - elif all([isinstance(i, list) and i for i in texts]): - do_parse = self.ddp.parse_seg - else: - raise ValueError("All of the elements should be string or list") - results = do_parse(texts) - if return_visual: - for result in results: - result['visual'] = self.visualize( - result['word'], result['head'], result['deprel']) - return results - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - input_data = self.check_input_data(args) - - results = self.parse(texts=input_data) - - return results - - def visualize(self, word, head, deprel): - """ - Visualize the dependency. - - Args: - word: list[str], the tokenized words. - head: list[int], the head ids. - deprel: list[str], the dependency relation. - - Returns: - data: a numpy array, use cv2.imshow to show it or cv2.imwrite to save it. - """ - nodes = ['ROOT'] + word - x = list(range(len(nodes))) - y = [0] * (len(nodes)) - fig, ax = plt.subplots() - # control the picture size - max_span = max([abs(i + 1 - j) for i, j in enumerate(head)]) - fig.set_size_inches((len(nodes), max_span / 2)) - # set the points - plt.scatter(x, y, c='w') - - for i in range(len(nodes)): - txt = nodes[i] - xytext = (i, 0) - if i == 0: - # set 'ROOT' - ax.annotate( - txt, - xy=xytext, - xycoords='data', - xytext=xytext, - textcoords='data', - ) - else: - xy = (head[i - 1], 0) - rad = 0.5 if head[i - 1] < i else -0.5 - # set the word - ax.annotate( - txt, - xy=xy, - xycoords='data', - xytext=(xytext[0] - 0.1, xytext[1]), - textcoords='data', - fontproperties=self.font) - # draw the curve - ax.annotate( - "", - xy=xy, - xycoords='data', - xytext=xytext, - textcoords='data', - arrowprops=dict( - arrowstyle="<-", - shrinkA=12, - shrinkB=12, - color='blue', - connectionstyle="arc3,rad=%s" % rad, - ), - ) - # set the deprel label. Calculate its position by the radius - text_x = min(i, head[i - 1]) + abs((i - head[i - 1])) / 2 - 0.2 - text_y = abs((i - head[i - 1])) / 4 - ax.annotate( - deprel[i - 1], - xy=xy, - xycoords='data', - xytext=[text_x, text_y], - textcoords='data') - - # control the axis - plt.axis('equal') - plt.axis('off') - - # save to numpy array - fig.canvas.draw() - data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) - data = data.reshape(fig.canvas.get_width_height()[::-1] + - (3, ))[:, :, ::-1] - return data - - -if __name__ == "__main__": - module = ddparser() - # Data to be predicted - test_text = ["百度是一家高科技公司"] - results = module.parse(texts=test_text) - print(results) - test_tokens = [['百度', '是', '一家', '高科技', '公司']] - results = module.parse(texts=test_text, return_visual=True) - print(results) - result = results[0] - data = module.visualize(result['word'], result['head'], result['deprel']) - import cv2 - import numpy as np - cv2.imwrite('test1.jpg', data) - cv2.imwrite('test2.jpg', result['visual']) diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/decode.py b/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/decode.py deleted file mode 100644 index a42bfee19523546558c8b08683235f577e03550d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/decode.py +++ /dev/null @@ -1,305 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import numpy as np -from collections import namedtuple - -import paddle.fluid as F -import paddle.fluid.layers as L -import paddle.fluid.dygraph as D - - -def gen_bias(encoder_inputs, decoder_inputs, step): - decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] - attn_bias = L.reshape( - L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) - decoder_bias = L.cast( - (L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), - 'float32') #[1, 1, decoderlen, decoderlen] - encoder_bias = L.unsqueeze( - L.cast(L.ones_like(encoder_inputs), 'float32'), - [1]) #[bsz, 1, encoderlen] - encoder_bias = L.expand( - encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] - decoder_bias = L.expand(decoder_bias, - [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] - if step > 0: - bias = L.concat([ - encoder_bias, - L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias - ], -1) - else: - bias = L.concat([encoder_bias, decoder_bias], -1) - return bias - - -@D.no_grad -def greedy_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - tgt_type_id=3): - model.eval() - _, logits, info = model(q_ids, q_sids) - gen_ids = L.argmax(logits, -1) - d_batch, d_seqlen = q_ids.shape - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - has_stopped = np.zeros([d_batch], dtype=np.bool) - gen_seq_len = np.zeros([d_batch], dtype=np.int64) - output_ids = [] - - past_cache = info['caches'] - - cls_ids = L.ones([d_batch], dtype='int64') * sos_id - attn_ids = L.ones([d_batch], dtype='int64') * attn_id - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) - pos_ids += seqlen - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - gen_ids = L.argmax(logits, -1) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - L.concat([pk, k[:, :1, :]], 1) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - L.concat([pv, v[:, :1, :]], 1) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - gen_ids = gen_ids[:, 1] - ids = L.stack([gen_ids, attn_ids], 1) - - gen_ids = gen_ids.numpy() - has_stopped |= (gen_ids == eos_id).astype(np.bool) - gen_seq_len += (1 - has_stopped.astype(np.int64)) - output_ids.append(gen_ids.tolist()) - if has_stopped.all(): - break - output_ids = np.array(output_ids).transpose([1, 0]) - return output_ids - - -BeamSearchState = namedtuple('BeamSearchState', - ['log_probs', 'lengths', 'finished']) -BeamSearchOutput = namedtuple('BeamSearchOutput', - ['scores', 'predicted_ids', 'beam_parent_ids']) - - -def log_softmax(x): - e_x = np.exp(x - np.max(x)) - return np.log(e_x / e_x.sum()) - - -def mask_prob(p, onehot_eos, finished): - is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') - p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + ( - 1. - is_finished) * p - return p - - -def hyp_score(log_probs, length, length_penalty): - lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) - return log_probs / lp - - -def beam_search_step(state, logits, eos_id, beam_width, is_first_step, - length_penalty): - """logits.shape == [B*W, V]""" - beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size - logits_np = logits.numpy() - for i in range(beam_size): - logits_np[i][17963] = 0 # make [UNK] prob = 0 - logits = D.to_variable(logits_np) - - bsz, beam_width = state.log_probs.shape - onehot_eos = L.cast( - F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] - - probs = L.log(L.softmax(logits)) #[B*W, V] - probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] - allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] - - not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] - not_eos = 1 - onehot_eos - length_to_add = not_finished * not_eos #[B*W,V] - alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add - - allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) - alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) - allscore = hyp_score(allprobs, alllen, length_penalty) - if is_first_step: - allscore = L.reshape( - allscore, - [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 - scores, idx = L.topk(allscore, k=beam_width) #[B, W] - next_beam_id = idx // vocab_size #[B, W] - next_word_id = idx % vocab_size - - gather_idx = L.concat([L.where(idx != -1)[:, :1], - L.reshape(idx, [-1, 1])], 1) - next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) - next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) - - gather_idx = L.concat( - [L.where(next_beam_id != -1)[:, :1], - L.reshape(next_beam_id, [-1, 1])], 1) - next_finished = L.reshape( - L.gather_nd(state.finished, gather_idx), - state.finished.shape) #[gather new beam state according to new beam id] - - next_finished += L.cast(next_word_id == eos_id, 'int64') - next_finished = L.cast(next_finished > 0, 'int64') - - next_state = BeamSearchState( - log_probs=next_probs, lengths=next_len, finished=next_finished) - output = BeamSearchOutput( - scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) - - return output, next_state - - -@D.no_grad -def beam_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - beam_width=5, - tgt_type_id=3, - length_penalty=1.0): - model.eval() - _, __, info = model(q_ids, q_sids) - d_batch, d_seqlen = q_ids.shape - - state = BeamSearchState( - log_probs=L.zeros([d_batch, beam_width], 'float32'), - lengths=L.zeros([d_batch, beam_width], 'int64'), - finished=L.zeros([d_batch, beam_width], 'int64')) - outputs = [] - - def reorder_(t, parent_id): - """reorder cache according to parent beam id""" - gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape( - parent_id, [-1]) - t = L.gather(t, gather_idx) - return t - - def tile_(t, times): - _shapes = list(t.shape[1:]) - ret = L.reshape( - L.expand(L.unsqueeze(t, [1]), [ - 1, - times, - ] + [ - 1, - ] * len(_shapes)), [ - -1, - ] + _shapes) - return ret - - cached_k, cached_v = info['caches'] - cached_k = [tile_(k, beam_width) for k in cached_k] - cached_v = [tile_(v, beam_width) for v in cached_v] - past_cache = (cached_k, cached_v) - - q_ids = tile_(q_ids, beam_width) - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - - cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id - attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile( - np.array([[step, step + 1]], dtype=np.int64), - [d_batch * beam_width, 1])) - pos_ids += seqlen - - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - - output, state = beam_search_step( - state, - logits[:, 1], - eos_id=eos_id, - beam_width=beam_width, - is_first_step=(step == 0), - length_penalty=length_penalty) - outputs.append(output) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - pred_ids_flatten = L.reshape(output.predicted_ids, - [d_batch * beam_width]) - ids = L.stack([pred_ids_flatten, attn_ids], 1) - - if state.finished.numpy().all(): - break - - final_ids = L.stack([o.predicted_ids for o in outputs], 0) - final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) - final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, - #0] #pick best beam - final_ids = L.transpose( - L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) - return final_ids - - -en_patten = re.compile(r'^[a-zA-Z0-9]*$') - - -def post_process(token): - if token.startswith('##'): - ret = token[2:] - else: - if en_patten.match(token): - ret = ' ' + token - else: - ret = token - return ret diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/file_utils.py b/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/file_utils.py deleted file mode 100644 index 613a5213a83e7fbd2a126cdb49b12eb62d4de41f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/file_utils.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from tqdm import tqdm -from paddlehub.common.logger import logger -from paddlehub.common.dir import MODULE_HOME - - -def _fetch_from_remote(url, force_download=False): - import tempfile, requests, tarfile - cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") - if force_download or not os.path.exists(cached_dir): - with tempfile.NamedTemporaryFile() as f: - #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' - r = requests.get(url, stream=True) - total_len = int(r.headers.get('content-length')) - for chunk in tqdm( - r.iter_content(chunk_size=1024), - total=total_len // 1024, - desc='downloading %s' % url, - unit='KB'): - if chunk: - f.write(chunk) - f.flush() - logger.debug('extacting... to %s' % f.name) - with tarfile.open(f.name) as tf: - tf.extractall(path=cached_dir) - logger.debug('%s cached in %s' % (url, cached_dir)) - return cached_dir - - -def add_docstring(doc): - def func(f): - f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) - return f - - return func diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie.py deleted file mode 100644 index 7c2304f67d7347e584c244ab8384eff0720f7cc2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -import logging - -import paddle.fluid.dygraph as D -import paddle.fluid as F -import paddle.fluid.layers as L - -log = logging.getLogger(__name__) - - -def _build_linear(n_in, n_out, name, init, act=None): - return D.Linear( - n_in, - n_out, - param_attr=F.ParamAttr( - name='%s.w_0' % name if name is not None else None, - initializer=init), - bias_attr='%s.b_0' % name if name is not None else None, - act=act) - - -def _build_ln(n_in, name): - return D.LayerNorm( - normalized_shape=n_in, - param_attr=F.ParamAttr( - name='%s_layer_norm_scale' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - bias_attr=F.ParamAttr( - name='%s_layer_norm_bias' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - ) - - -def append_name(name, postfix): - if name is None: - return None - elif name == '': - return postfix - else: - return '%s_%s' % (name, postfix) - - -class AttentionLayer(D.Layer): - def __init__(self, cfg, name=None): - super(AttentionLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - n_head = cfg['num_attention_heads'] - assert d_model % n_head == 0 - d_model_q = cfg.get('query_hidden_size_per_head', - d_model // n_head) * n_head - d_model_v = cfg.get('value_hidden_size_per_head', - d_model // n_head) * n_head - self.n_head = n_head - self.d_key = d_model_q // n_head - self.q = _build_linear(d_model, d_model_q, append_name( - name, 'query_fc'), initializer) - self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), - initializer) - self.v = _build_linear(d_model, d_model_v, append_name( - name, 'value_fc'), initializer) - self.o = _build_linear(d_model_v, d_model, append_name( - name, 'output_fc'), initializer) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=cfg['attention_probs_dropout_prob'], - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, queries, keys, values, attn_bias, past_cache): - assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 - - q = self.q(queries) - k = self.k(keys) - v = self.v(values) - - cache = (k, v) - if past_cache is not None: - cached_k, cached_v = past_cache - k = L.concat([cached_k, k], 1) - v = L.concat([cached_v, v], 1) - - q = L.transpose( - L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - k = L.transpose( - L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - v = L.transpose( - L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - - q = L.scale(q, scale=self.d_key**-0.5) - score = L.matmul(q, k, transpose_y=True) - if attn_bias is not None: - score += attn_bias - score = L.softmax(score, use_cudnn=True) - score = self.dropout(score) - - out = L.matmul(score, v) - out = L.transpose(out, [0, 2, 1, 3]) - out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) - - out = self.o(out) - return out, cache - - -class PositionwiseFeedForwardLayer(D.Layer): - def __init__(self, cfg, name=None): - super(PositionwiseFeedForwardLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_ffn = cfg.get('intermediate_size', 4 * d_model) - assert cfg['hidden_act'] in ['relu', 'gelu'] - self.i = _build_linear( - d_model, - d_ffn, - append_name(name, 'fc_0'), - initializer, - act=cfg['hidden_act']) - self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), - initializer) - prob = cfg.get('intermediate_dropout_prob', 0.) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs): - hidden = self.i(inputs) - hidden = self.dropout(hidden) - out = self.o(hidden) - return out - - -class ErnieBlock(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieBlock, self).__init__() - d_model = cfg['hidden_size'] - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.attn = AttentionLayer( - cfg, name=append_name(name, 'multi_head_att')) - self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) - self.ffn = PositionwiseFeedForwardLayer( - cfg, name=append_name(name, 'ffn')) - self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) - prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs, attn_bias=None, past_cache=None): - attn_out, cache = self.attn( - inputs, inputs, inputs, attn_bias, - past_cache=past_cache) #self attn - attn_out = self.dropout(attn_out) - hidden = attn_out + inputs - hidden = self.ln1(hidden) # dropout/ add/ norm - - ffn_out = self.ffn(hidden) - ffn_out = self.dropout(ffn_out) - hidden = ffn_out + hidden - hidden = self.ln2(hidden) - return hidden, cache - - -class ErnieEncoderStack(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieEncoderStack, self).__init__() - n_layers = cfg['num_hidden_layers'] - self.block = D.LayerList([ - ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) - for i in range(n_layers) - ]) - - def forward(self, inputs, attn_bias=None, past_cache=None): - if past_cache is not None: - assert isinstance( - past_cache, tuple - ), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr( - type(past_cache)) - past_cache = list(zip(*past_cache)) - else: - past_cache = [None] * len(self.block) - cache_list_k, cache_list_v, hidden_list = [], [], [inputs] - - for b, p in zip(self.block, past_cache): - inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) - cache_k, cache_v = cache - cache_list_k.append(cache_k) - cache_list_v.append(cache_v) - hidden_list.append(inputs) - - return inputs, hidden_list, (cache_list_k, cache_list_v) - - -class ErnieModel(D.Layer): - def __init__(self, cfg, name=None): - """ - Fundamental pretrained Ernie model - """ - log.debug('init ErnieModel with config: %s' % repr(cfg)) - D.Layer.__init__(self) - d_model = cfg['hidden_size'] - d_emb = cfg.get('emb_size', cfg['hidden_size']) - d_vocab = cfg['vocab_size'] - d_pos = cfg['max_position_embeddings'] - d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] - self.n_head = cfg['num_attention_heads'] - self.return_additional_info = cfg.get('return_additional_info', False) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) - self.word_emb = D.Embedding([d_vocab, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'word_embedding'), - initializer=initializer)) - self.pos_emb = D.Embedding([d_pos, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'pos_embedding'), - initializer=initializer)) - self.sent_emb = D.Embedding([d_sent, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'sent_embedding'), - initializer=initializer)) - prob = cfg['hidden_dropout_prob'] - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - self.encoder_stack = ErnieEncoderStack(cfg, append_name( - name, 'encoder')) - if cfg.get('has_pooler', True): - self.pooler = _build_linear( - cfg['hidden_size'], - cfg['hidden_size'], - append_name(name, 'pooled_fc'), - initializer, - act='tanh') - else: - self.pooler = None - self.train() - - def eval(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).eval() - self.training = False - for l in self.sublayers(): - l.training = False - - def train(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).train() - self.training = True - for l in self.sublayers(): - l.training = True - - def forward(self, - src_ids, - sent_ids=None, - pos_ids=None, - input_mask=None, - attn_bias=None, - past_cache=None, - use_causal_mask=False): - """ - Args: - src_ids (`Variable` of shape `[batch_size, seq_len]`): - Indices of input sequence tokens in the vocabulary. - sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): - aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. - if None, assume all tokens come from `segment_a` - pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): - Indices of positions of each input sequence tokens in the position embeddings. - input_mask(optional `Variable` of shape `[batch_size, seq_len]`): - Mask to avoid performing attention on the padding token indices of the encoder input. - attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): - 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask - past_cache(optional, tuple of two lists: cached key and cached value, - each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): - cached key/value tensor that will be concated to generated key/value when performing self attention. - if set, `attn_bias` should not be None. - - Returns: - pooled (`Variable` of shape `[batch_size, hidden_size]`): - output logits of pooler classifier - encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): - output logits of transformer stack - """ - assert len( - src_ids.shape - ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr( - src_ids.shape)) - assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' - d_batch = L.shape(src_ids)[0] - d_seqlen = L.shape(src_ids)[1] - if pos_ids is None: - pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) - pos_ids = L.cast(pos_ids, 'int64') - if attn_bias is None: - if input_mask is None: - input_mask = L.cast(src_ids != 0, 'float32') - assert len(input_mask.shape) == 2 - input_mask = L.unsqueeze(input_mask, axes=[-1]) - attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) - if use_causal_mask: - sequence = L.reshape( - L.range(0, d_seqlen, 1, dtype='float32') + 1., - [1, 1, -1, 1]) - causal_mask = L.cast( - (L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), - 'float32') - attn_bias *= causal_mask - else: - assert len( - attn_bias.shape - ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape - attn_bias = (1. - attn_bias) * -10000.0 - attn_bias = L.unsqueeze(attn_bias, [1]) - attn_bias = L.expand(attn_bias, - [1, self.n_head, 1, 1]) # avoid broadcast =_= - attn_bias.stop_gradient = True - - if sent_ids is None: - sent_ids = L.zeros_like(src_ids) - - src_embedded = self.word_emb(src_ids) - pos_embedded = self.pos_emb(pos_ids) - sent_embedded = self.sent_emb(sent_ids) - embedded = src_embedded + pos_embedded + sent_embedded - - embedded = self.dropout(self.ln(embedded)) - - encoded, hidden_list, cache_list = self.encoder_stack( - embedded, attn_bias, past_cache=past_cache) - if self.pooler is not None: - pooled = self.pooler(encoded[:, 0, :]) - else: - pooled = None - - additional_info = { - 'hiddens': hidden_list, - 'caches': cache_list, - } - - if self.return_additional_info: - return pooled, encoded, additional_info - else: - return pooled, encoded diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie_gen.py b/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie_gen.py deleted file mode 100644 index 8b7690bf2eebcf94b69044f69229ca60df666bd1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie_gen.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as F -import paddle.fluid.layers as L - -from ernie_gen_acrostic_poetry.model.modeling_ernie import ErnieModel -from ernie_gen_acrostic_poetry.model.modeling_ernie import _build_linear, _build_ln, append_name - - -class ErnieModelForGeneration(ErnieModel): - def __init__(self, cfg, name=None): - cfg['return_additional_info'] = True - cfg['has_pooler'] = False - super(ErnieModelForGeneration, self).__init__(cfg, name=name) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_vocab = cfg['vocab_size'] - - self.mlm = _build_linear( - d_model, - d_model, - append_name(name, 'mask_lm_trans_fc'), - initializer, - act=cfg['hidden_act']) - self.mlm_ln = _build_ln( - d_model, name=append_name(name, 'mask_lm_trans')) - self.mlm_bias = L.create_parameter( - dtype='float32', - shape=[d_vocab], - attr=F.ParamAttr( - name=append_name(name, 'mask_lm_out_fc.b_0'), - initializer=F.initializer.Constant(value=0.0)), - is_bias=True, - ) - - def forward(self, src_ids, *args, **kwargs): - tgt_labels = kwargs.pop('tgt_labels', None) - tgt_pos = kwargs.pop('tgt_pos', None) - encode_only = kwargs.pop('encode_only', False) - _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) - if encode_only: - return None, None, info - elif tgt_labels is None: - encoded = self.mlm(encoded) - encoded = self.mlm_ln(encoded) - logits = L.matmul( - encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias - output_ids = L.argmax(logits, -1) - return output_ids, logits, info - else: - encoded_2d = L.gather_nd(encoded, tgt_pos) - encoded_2d = self.mlm(encoded_2d) - encoded_2d = self.mlm_ln(encoded_2d) - logits_2d = L.matmul( - encoded_2d, self.word_emb.weight, - transpose_y=True) + self.mlm_bias - if len(tgt_labels.shape) == 1: - tgt_labels = L.reshape(tgt_labels, [-1, 1]) - - loss = L.reduce_mean( - L.softmax_with_cross_entropy( - logits_2d, - tgt_labels, - soft_label=(tgt_labels.shape[-1] != 1))) - return loss, logits_2d, info diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/tokenizing_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/tokenizing_ernie.py deleted file mode 100644 index 3039b7028f5da991189527b8145b05c952dafbbd..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/model/tokenizing_ernie.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six -import re -import logging -from functools import partial - -import numpy as np - -import io - -open = partial(io.open, encoding='utf8') - -log = logging.getLogger(__name__) - -_max_input_chars_per_word = 100 - - -def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): - """ wordpiece: helloworld => [hello, ##world] """ - chars = list(token) - if len(chars) > _max_input_chars_per_word: - return [unk_token], [(0, len(chars))] - - is_bad = False - start = 0 - sub_tokens = [] - sub_pos = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start == 0: - substr = sentencepiece_prefix + substr - if start > 0: - substr = prefix + substr - if substr in vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - sub_pos.append((start, end)) - start = end - if is_bad: - return [unk_token], [(0, len(chars))] - else: - return sub_tokens, sub_pos - - -class ErnieTokenizer(object): - def __init__(self, - vocab, - unk_token='[UNK]', - sep_token='[SEP]', - cls_token='[CLS]', - pad_token='[PAD]', - mask_token='[MASK]', - wordpiece_prefix='##', - sentencepiece_prefix='', - lower=True, - encoding='utf8', - special_token_list=[]): - if not isinstance(vocab, dict): - raise ValueError( - 'expect `vocab` to be instance of dict, got %s' % type(vocab)) - self.vocab = vocab - self.lower = lower - self.prefix = wordpiece_prefix - self.sentencepiece_prefix = sentencepiece_prefix - self.pad_id = self.vocab[pad_token] - self.cls_id = cls_token and self.vocab[cls_token] - self.sep_id = sep_token and self.vocab[sep_token] - self.unk_id = unk_token and self.vocab[unk_token] - self.mask_id = mask_token and self.vocab[mask_token] - self.unk_token = unk_token - special_tokens = { - pad_token, cls_token, sep_token, unk_token, mask_token - } | set(special_token_list) - pat_str = '' - for t in special_tokens: - if t is None: - continue - pat_str += '(%s)|' % re.escape(t) - pat_str += r'([a-zA-Z0-9]+|\S)' - log.debug('regex: %s' % pat_str) - self.pat = re.compile(pat_str) - self.encoding = encoding - - def tokenize(self, text): - if len(text) == 0: - return [] - if six.PY3 and not isinstance(text, six.string_types): - text = text.decode(self.encoding) - if six.PY2 and isinstance(text, str): - text = text.decode(self.encoding) - - res = [] - for match in self.pat.finditer(text): - match_group = match.group(0) - if match.groups()[-1]: - if self.lower: - match_group = match_group.lower() - words, _ = _wordpiece( - match_group, - vocab=self.vocab, - unk_token=self.unk_token, - prefix=self.prefix, - sentencepiece_prefix=self.sentencepiece_prefix) - else: - words = [match_group] - res += words - return res - - def convert_tokens_to_ids(self, tokens): - return [self.vocab.get(t, self.unk_id) for t in tokens] - - def truncate(self, id1, id2, seqlen): - len1 = len(id1) - len2 = len(id2) - half = seqlen // 2 - if len1 > len2: - len1_truncated, len2_truncated = max(half, seqlen - len2), min( - half, len2) - else: - len1_truncated, len2_truncated = min(half, seqlen - len1), max( - half, seqlen - len1) - return id1[:len1_truncated], id2[:len2_truncated] - - def build_for_ernie(self, text_id, pair_id=[]): - """build sentence type id, add [CLS] [SEP]""" - text_id_type = np.zeros_like(text_id, dtype=np.int64) - ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) - - if len(pair_id): - pair_id_type = np.ones_like(pair_id, dtype=np.int64) - ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) - return ret_id, ret_id_type - - def encode(self, text, pair=None, truncate_to=None): - text_id = np.array( - self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) - text_id_type = np.zeros_like(text_id, dtype=np.int64) - if pair is not None: - pair_id = np.array( - self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) - else: - pair_id = [] - if truncate_to is not None: - text_id, pair_id = self.truncate( - text_id, [] if pair_id is None else pair_id, truncate_to) - - ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) - return ret_id, ret_id_type diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/module.py b/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/module.py deleted file mode 100644 index 1a92996d5e0358ee289eed673a29389fd43bd1e0..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/module.py +++ /dev/null @@ -1,204 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import json - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import argparse -import os -import numpy as np - -import paddle.fluid.dygraph as D - -from ernie_gen_acrostic_poetry.model.tokenizing_ernie import ErnieTokenizer -from ernie_gen_acrostic_poetry.model.decode import beam_search_infilling -from ernie_gen_acrostic_poetry.model.modeling_ernie_gen import ErnieModelForGeneration - - -@moduleinfo( - name="ernie_gen_acrostic_poetry", - version="1.0.1", - summary= - "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for poetry generation task.", - author="adaxiadaxi", - author_email="", - type="nlp/text_generation", -) -class ErnieGen(hub.NLPPredictionModule): - def _initialize(self, line=4, word=7): - """ - initialize with the necessary elements - """ - if line not in [4, 8]: - raise ValueError("The line could only be 4 or 8.") - if word not in [5, 7]: - raise ValueError("The word could only be 5 or 7.") - - self.line = line - assets_path = os.path.join(self.directory, "assets") - gen_checkpoint_path = os.path.join( - assets_path, "ernie_gen_acrostic_poetry_L%sW%s" % (line, word)) - ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') - with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: - ernie_cfg = dict(json.loads(ernie_cfg_file.read())) - ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') - with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: - ernie_vocab = { - j.strip().split('\t')[0]: i - for i, j in enumerate(ernie_vocab_file.readlines()) - } - - with fluid.dygraph.guard(fluid.CPUPlace()): - with fluid.unique_name.guard(): - self.model = ErnieModelForGeneration(ernie_cfg) - finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) - self.model.set_dict(finetuned_states) - - self.tokenizer = ErnieTokenizer(ernie_vocab) - self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} - self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] - self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] - self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) - - @serving - def generate(self, texts, use_gpu=False, beam_width=5): - """ - Get the continuation of the input poetry. - - Args: - texts(list): the front part of a poetry. - use_gpu(bool): whether use gpu to predict or not - beam_width(int): the beam search width. - - Returns: - results(list): the poetry continuations. - """ - if texts and isinstance(texts, list) and all(texts) and all( - [isinstance(text, str) for text in texts]): - predicted_data = texts - else: - raise ValueError( - "The input texts should be a list with nonempty string elements." - ) - for i, text in enumerate(texts): - if len(text) > self.line: - logger.warning( - 'The input text: %s, contains more than %i characters, which will be cut off' - % (text, self.line)) - texts[i] = text[:self.line] - - for char in text: - if not '\u4e00' <= char <= '\u9fff': - logger.warning( - 'The input text: %s, contains non-Chinese characters, which may result in magic output' - % text) - break - - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - with fluid.dygraph.guard(place): - self.model.eval() - results = [] - for text in predicted_data: - sample_results = [] - ids, sids = self.tokenizer.encode(text) - src_ids = D.to_variable(np.expand_dims(ids, 0)) - src_sids = D.to_variable(np.expand_dims(sids, 0)) - output_ids = beam_search_infilling( - self.model, - src_ids, - src_sids, - eos_id=self.tokenizer.sep_id, - sos_id=self.tokenizer.cls_id, - attn_id=self.tokenizer.vocab['[MASK]'], - max_decode_len=80, - max_encode_len=20, - beam_width=beam_width, - tgt_type_id=1) - output_str = self.rev_lookup(output_ids[0].numpy()) - - for ostr in output_str.tolist(): - if '[SEP]' in ostr: - ostr = ostr[:ostr.index('[SEP]')] - sample_results.append("".join(ostr)) - results.append(sample_results) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--beam_width', type=int, default=5, help="the beam search width") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate( - texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) - - return results - - -if __name__ == "__main__": - module = ErnieGen() - for result in module.generate(['夏雨荷', '我喜欢你'], beam_width=5): - print(result) diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/decode.py b/hub_module/modules/text/text_generation/ernie_gen_couplet/model/decode.py deleted file mode 100644 index c58fdbe2e8902346162f8733ef0cd94ba65757a2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/decode.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import numpy as np -from collections import namedtuple - -import paddle.fluid as F -import paddle.fluid.layers as L -import paddle.fluid.dygraph as D - - -def gen_bias(encoder_inputs, decoder_inputs, step): - decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] - attn_bias = L.reshape( - L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) - decoder_bias = L.cast( - (L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), - 'float32') #[1, 1, decoderlen, decoderlen] - encoder_bias = L.unsqueeze( - L.cast(L.ones_like(encoder_inputs), 'float32'), - [1]) #[bsz, 1, encoderlen] - encoder_bias = L.expand( - encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] - decoder_bias = L.expand(decoder_bias, - [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] - if step > 0: - bias = L.concat([ - encoder_bias, - L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias - ], -1) - else: - bias = L.concat([encoder_bias, decoder_bias], -1) - return bias - - -@D.no_grad -def greedy_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - tgt_type_id=3): - model.eval() - _, logits, info = model(q_ids, q_sids) - gen_ids = L.argmax(logits, -1) - d_batch, d_seqlen = q_ids.shape - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - has_stopped = np.zeros([d_batch], dtype=np.bool) - gen_seq_len = np.zeros([d_batch], dtype=np.int64) - output_ids = [] - - past_cache = info['caches'] - - cls_ids = L.ones([d_batch], dtype='int64') * sos_id - attn_ids = L.ones([d_batch], dtype='int64') * attn_id - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) - pos_ids += seqlen - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - gen_ids = L.argmax(logits, -1) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - L.concat([pk, k[:, :1, :]], 1) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - L.concat([pv, v[:, :1, :]], 1) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - gen_ids = gen_ids[:, 1] - ids = L.stack([gen_ids, attn_ids], 1) - - gen_ids = gen_ids.numpy() - has_stopped |= (gen_ids == eos_id).astype(np.bool) - gen_seq_len += (1 - has_stopped.astype(np.int64)) - output_ids.append(gen_ids.tolist()) - if has_stopped.all(): - break - output_ids = np.array(output_ids).transpose([1, 0]) - return output_ids - - -BeamSearchState = namedtuple('BeamSearchState', - ['log_probs', 'lengths', 'finished']) -BeamSearchOutput = namedtuple('BeamSearchOutput', - ['scores', 'predicted_ids', 'beam_parent_ids']) - - -def log_softmax(x): - e_x = np.exp(x - np.max(x)) - return np.log(e_x / e_x.sum()) - - -def mask_prob(p, onehot_eos, finished): - is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') - p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + ( - 1. - is_finished) * p - return p - - -def hyp_score(log_probs, length, length_penalty): - lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) - return log_probs / lp - - -def beam_search_step(state, logits, eos_id, beam_width, is_first_step, - length_penalty): - """logits.shape == [B*W, V]""" - _, vocab_size = logits.shape - - bsz, beam_width = state.log_probs.shape - onehot_eos = L.cast( - F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] - - probs = L.log(L.softmax(logits)) #[B*W, V] - probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] - allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] - - not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] - not_eos = 1 - onehot_eos - length_to_add = not_finished * not_eos #[B*W,V] - alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add - - allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) - alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) - allscore = hyp_score(allprobs, alllen, length_penalty) - if is_first_step: - allscore = L.reshape( - allscore, - [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 - scores, idx = L.topk(allscore, k=beam_width) #[B, W] - next_beam_id = idx // vocab_size #[B, W] - next_word_id = idx % vocab_size - - gather_idx = L.concat([L.where(idx != -1)[:, :1], - L.reshape(idx, [-1, 1])], 1) - next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) - next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) - - gather_idx = L.concat( - [L.where(next_beam_id != -1)[:, :1], - L.reshape(next_beam_id, [-1, 1])], 1) - next_finished = L.reshape( - L.gather_nd(state.finished, gather_idx), - state.finished.shape) #[gather new beam state according to new beam id] - - next_finished += L.cast(next_word_id == eos_id, 'int64') - next_finished = L.cast(next_finished > 0, 'int64') - - next_state = BeamSearchState( - log_probs=next_probs, lengths=next_len, finished=next_finished) - output = BeamSearchOutput( - scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) - - return output, next_state - - -@D.no_grad -def beam_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - beam_width=5, - tgt_type_id=3, - length_penalty=1.0): - model.eval() - _, __, info = model(q_ids, q_sids) - d_batch, d_seqlen = q_ids.shape - - state = BeamSearchState( - log_probs=L.zeros([d_batch, beam_width], 'float32'), - lengths=L.zeros([d_batch, beam_width], 'int64'), - finished=L.zeros([d_batch, beam_width], 'int64')) - outputs = [] - - def reorder_(t, parent_id): - """reorder cache according to parent beam id""" - gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape( - parent_id, [-1]) - t = L.gather(t, gather_idx) - return t - - def tile_(t, times): - _shapes = list(t.shape[1:]) - ret = L.reshape( - L.expand(L.unsqueeze(t, [1]), [ - 1, - times, - ] + [ - 1, - ] * len(_shapes)), [ - -1, - ] + _shapes) - return ret - - cached_k, cached_v = info['caches'] - cached_k = [tile_(k, beam_width) for k in cached_k] - cached_v = [tile_(v, beam_width) for v in cached_v] - past_cache = (cached_k, cached_v) - - q_ids = tile_(q_ids, beam_width) - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - - cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id - attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile( - np.array([[step, step + 1]], dtype=np.int64), - [d_batch * beam_width, 1])) - pos_ids += seqlen - - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - - output, state = beam_search_step( - state, - logits[:, 1], - eos_id=eos_id, - beam_width=beam_width, - is_first_step=(step == 0), - length_penalty=length_penalty) - outputs.append(output) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - pred_ids_flatten = L.reshape(output.predicted_ids, - [d_batch * beam_width]) - ids = L.stack([pred_ids_flatten, attn_ids], 1) - - if state.finished.numpy().all(): - break - - final_ids = L.stack([o.predicted_ids for o in outputs], 0) - final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) - final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, - #0] #pick best beam - final_ids = L.transpose( - L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) - return final_ids - - -en_patten = re.compile(r'^[a-zA-Z0-9]*$') - - -def post_process(token): - if token.startswith('##'): - ret = token[2:] - else: - if en_patten.match(token): - ret = ' ' + token - else: - ret = token - return ret diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/file_utils.py b/hub_module/modules/text/text_generation/ernie_gen_couplet/model/file_utils.py deleted file mode 100644 index 613a5213a83e7fbd2a126cdb49b12eb62d4de41f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/file_utils.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from tqdm import tqdm -from paddlehub.common.logger import logger -from paddlehub.common.dir import MODULE_HOME - - -def _fetch_from_remote(url, force_download=False): - import tempfile, requests, tarfile - cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") - if force_download or not os.path.exists(cached_dir): - with tempfile.NamedTemporaryFile() as f: - #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' - r = requests.get(url, stream=True) - total_len = int(r.headers.get('content-length')) - for chunk in tqdm( - r.iter_content(chunk_size=1024), - total=total_len // 1024, - desc='downloading %s' % url, - unit='KB'): - if chunk: - f.write(chunk) - f.flush() - logger.debug('extacting... to %s' % f.name) - with tarfile.open(f.name) as tf: - tf.extractall(path=cached_dir) - logger.debug('%s cached in %s' % (url, cached_dir)) - return cached_dir - - -def add_docstring(doc): - def func(f): - f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) - return f - - return func diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie.py deleted file mode 100644 index 7c2304f67d7347e584c244ab8384eff0720f7cc2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -import logging - -import paddle.fluid.dygraph as D -import paddle.fluid as F -import paddle.fluid.layers as L - -log = logging.getLogger(__name__) - - -def _build_linear(n_in, n_out, name, init, act=None): - return D.Linear( - n_in, - n_out, - param_attr=F.ParamAttr( - name='%s.w_0' % name if name is not None else None, - initializer=init), - bias_attr='%s.b_0' % name if name is not None else None, - act=act) - - -def _build_ln(n_in, name): - return D.LayerNorm( - normalized_shape=n_in, - param_attr=F.ParamAttr( - name='%s_layer_norm_scale' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - bias_attr=F.ParamAttr( - name='%s_layer_norm_bias' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - ) - - -def append_name(name, postfix): - if name is None: - return None - elif name == '': - return postfix - else: - return '%s_%s' % (name, postfix) - - -class AttentionLayer(D.Layer): - def __init__(self, cfg, name=None): - super(AttentionLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - n_head = cfg['num_attention_heads'] - assert d_model % n_head == 0 - d_model_q = cfg.get('query_hidden_size_per_head', - d_model // n_head) * n_head - d_model_v = cfg.get('value_hidden_size_per_head', - d_model // n_head) * n_head - self.n_head = n_head - self.d_key = d_model_q // n_head - self.q = _build_linear(d_model, d_model_q, append_name( - name, 'query_fc'), initializer) - self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), - initializer) - self.v = _build_linear(d_model, d_model_v, append_name( - name, 'value_fc'), initializer) - self.o = _build_linear(d_model_v, d_model, append_name( - name, 'output_fc'), initializer) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=cfg['attention_probs_dropout_prob'], - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, queries, keys, values, attn_bias, past_cache): - assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 - - q = self.q(queries) - k = self.k(keys) - v = self.v(values) - - cache = (k, v) - if past_cache is not None: - cached_k, cached_v = past_cache - k = L.concat([cached_k, k], 1) - v = L.concat([cached_v, v], 1) - - q = L.transpose( - L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - k = L.transpose( - L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - v = L.transpose( - L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - - q = L.scale(q, scale=self.d_key**-0.5) - score = L.matmul(q, k, transpose_y=True) - if attn_bias is not None: - score += attn_bias - score = L.softmax(score, use_cudnn=True) - score = self.dropout(score) - - out = L.matmul(score, v) - out = L.transpose(out, [0, 2, 1, 3]) - out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) - - out = self.o(out) - return out, cache - - -class PositionwiseFeedForwardLayer(D.Layer): - def __init__(self, cfg, name=None): - super(PositionwiseFeedForwardLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_ffn = cfg.get('intermediate_size', 4 * d_model) - assert cfg['hidden_act'] in ['relu', 'gelu'] - self.i = _build_linear( - d_model, - d_ffn, - append_name(name, 'fc_0'), - initializer, - act=cfg['hidden_act']) - self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), - initializer) - prob = cfg.get('intermediate_dropout_prob', 0.) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs): - hidden = self.i(inputs) - hidden = self.dropout(hidden) - out = self.o(hidden) - return out - - -class ErnieBlock(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieBlock, self).__init__() - d_model = cfg['hidden_size'] - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.attn = AttentionLayer( - cfg, name=append_name(name, 'multi_head_att')) - self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) - self.ffn = PositionwiseFeedForwardLayer( - cfg, name=append_name(name, 'ffn')) - self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) - prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs, attn_bias=None, past_cache=None): - attn_out, cache = self.attn( - inputs, inputs, inputs, attn_bias, - past_cache=past_cache) #self attn - attn_out = self.dropout(attn_out) - hidden = attn_out + inputs - hidden = self.ln1(hidden) # dropout/ add/ norm - - ffn_out = self.ffn(hidden) - ffn_out = self.dropout(ffn_out) - hidden = ffn_out + hidden - hidden = self.ln2(hidden) - return hidden, cache - - -class ErnieEncoderStack(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieEncoderStack, self).__init__() - n_layers = cfg['num_hidden_layers'] - self.block = D.LayerList([ - ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) - for i in range(n_layers) - ]) - - def forward(self, inputs, attn_bias=None, past_cache=None): - if past_cache is not None: - assert isinstance( - past_cache, tuple - ), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr( - type(past_cache)) - past_cache = list(zip(*past_cache)) - else: - past_cache = [None] * len(self.block) - cache_list_k, cache_list_v, hidden_list = [], [], [inputs] - - for b, p in zip(self.block, past_cache): - inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) - cache_k, cache_v = cache - cache_list_k.append(cache_k) - cache_list_v.append(cache_v) - hidden_list.append(inputs) - - return inputs, hidden_list, (cache_list_k, cache_list_v) - - -class ErnieModel(D.Layer): - def __init__(self, cfg, name=None): - """ - Fundamental pretrained Ernie model - """ - log.debug('init ErnieModel with config: %s' % repr(cfg)) - D.Layer.__init__(self) - d_model = cfg['hidden_size'] - d_emb = cfg.get('emb_size', cfg['hidden_size']) - d_vocab = cfg['vocab_size'] - d_pos = cfg['max_position_embeddings'] - d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] - self.n_head = cfg['num_attention_heads'] - self.return_additional_info = cfg.get('return_additional_info', False) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) - self.word_emb = D.Embedding([d_vocab, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'word_embedding'), - initializer=initializer)) - self.pos_emb = D.Embedding([d_pos, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'pos_embedding'), - initializer=initializer)) - self.sent_emb = D.Embedding([d_sent, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'sent_embedding'), - initializer=initializer)) - prob = cfg['hidden_dropout_prob'] - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - self.encoder_stack = ErnieEncoderStack(cfg, append_name( - name, 'encoder')) - if cfg.get('has_pooler', True): - self.pooler = _build_linear( - cfg['hidden_size'], - cfg['hidden_size'], - append_name(name, 'pooled_fc'), - initializer, - act='tanh') - else: - self.pooler = None - self.train() - - def eval(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).eval() - self.training = False - for l in self.sublayers(): - l.training = False - - def train(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).train() - self.training = True - for l in self.sublayers(): - l.training = True - - def forward(self, - src_ids, - sent_ids=None, - pos_ids=None, - input_mask=None, - attn_bias=None, - past_cache=None, - use_causal_mask=False): - """ - Args: - src_ids (`Variable` of shape `[batch_size, seq_len]`): - Indices of input sequence tokens in the vocabulary. - sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): - aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. - if None, assume all tokens come from `segment_a` - pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): - Indices of positions of each input sequence tokens in the position embeddings. - input_mask(optional `Variable` of shape `[batch_size, seq_len]`): - Mask to avoid performing attention on the padding token indices of the encoder input. - attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): - 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask - past_cache(optional, tuple of two lists: cached key and cached value, - each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): - cached key/value tensor that will be concated to generated key/value when performing self attention. - if set, `attn_bias` should not be None. - - Returns: - pooled (`Variable` of shape `[batch_size, hidden_size]`): - output logits of pooler classifier - encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): - output logits of transformer stack - """ - assert len( - src_ids.shape - ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr( - src_ids.shape)) - assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' - d_batch = L.shape(src_ids)[0] - d_seqlen = L.shape(src_ids)[1] - if pos_ids is None: - pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) - pos_ids = L.cast(pos_ids, 'int64') - if attn_bias is None: - if input_mask is None: - input_mask = L.cast(src_ids != 0, 'float32') - assert len(input_mask.shape) == 2 - input_mask = L.unsqueeze(input_mask, axes=[-1]) - attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) - if use_causal_mask: - sequence = L.reshape( - L.range(0, d_seqlen, 1, dtype='float32') + 1., - [1, 1, -1, 1]) - causal_mask = L.cast( - (L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), - 'float32') - attn_bias *= causal_mask - else: - assert len( - attn_bias.shape - ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape - attn_bias = (1. - attn_bias) * -10000.0 - attn_bias = L.unsqueeze(attn_bias, [1]) - attn_bias = L.expand(attn_bias, - [1, self.n_head, 1, 1]) # avoid broadcast =_= - attn_bias.stop_gradient = True - - if sent_ids is None: - sent_ids = L.zeros_like(src_ids) - - src_embedded = self.word_emb(src_ids) - pos_embedded = self.pos_emb(pos_ids) - sent_embedded = self.sent_emb(sent_ids) - embedded = src_embedded + pos_embedded + sent_embedded - - embedded = self.dropout(self.ln(embedded)) - - encoded, hidden_list, cache_list = self.encoder_stack( - embedded, attn_bias, past_cache=past_cache) - if self.pooler is not None: - pooled = self.pooler(encoded[:, 0, :]) - else: - pooled = None - - additional_info = { - 'hiddens': hidden_list, - 'caches': cache_list, - } - - if self.return_additional_info: - return pooled, encoded, additional_info - else: - return pooled, encoded diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie_gen.py b/hub_module/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie_gen.py deleted file mode 100644 index c2245ec3f03c4bf75ece5c5856e7074d4ab28b68..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie_gen.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as F -import paddle.fluid.layers as L - -from ernie_gen_couplet.model.modeling_ernie import ErnieModel -from ernie_gen_couplet.model.modeling_ernie import _build_linear, _build_ln, append_name - - -class ErnieModelForGeneration(ErnieModel): - def __init__(self, cfg, name=None): - cfg['return_additional_info'] = True - cfg['has_pooler'] = False - super(ErnieModelForGeneration, self).__init__(cfg, name=name) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_vocab = cfg['vocab_size'] - - self.mlm = _build_linear( - d_model, - d_model, - append_name(name, 'mask_lm_trans_fc'), - initializer, - act=cfg['hidden_act']) - self.mlm_ln = _build_ln( - d_model, name=append_name(name, 'mask_lm_trans')) - self.mlm_bias = L.create_parameter( - dtype='float32', - shape=[d_vocab], - attr=F.ParamAttr( - name=append_name(name, 'mask_lm_out_fc.b_0'), - initializer=F.initializer.Constant(value=0.0)), - is_bias=True, - ) - - def forward(self, src_ids, *args, **kwargs): - tgt_labels = kwargs.pop('tgt_labels', None) - tgt_pos = kwargs.pop('tgt_pos', None) - encode_only = kwargs.pop('encode_only', False) - _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) - if encode_only: - return None, None, info - elif tgt_labels is None: - encoded = self.mlm(encoded) - encoded = self.mlm_ln(encoded) - logits = L.matmul( - encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias - output_ids = L.argmax(logits, -1) - return output_ids, logits, info - else: - encoded_2d = L.gather_nd(encoded, tgt_pos) - encoded_2d = self.mlm(encoded_2d) - encoded_2d = self.mlm_ln(encoded_2d) - logits_2d = L.matmul( - encoded_2d, self.word_emb.weight, - transpose_y=True) + self.mlm_bias - if len(tgt_labels.shape) == 1: - tgt_labels = L.reshape(tgt_labels, [-1, 1]) - - loss = L.reduce_mean( - L.softmax_with_cross_entropy( - logits_2d, - tgt_labels, - soft_label=(tgt_labels.shape[-1] != 1))) - return loss, logits_2d, info diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/tokenizing_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_couplet/model/tokenizing_ernie.py deleted file mode 100644 index 3039b7028f5da991189527b8145b05c952dafbbd..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_couplet/model/tokenizing_ernie.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six -import re -import logging -from functools import partial - -import numpy as np - -import io - -open = partial(io.open, encoding='utf8') - -log = logging.getLogger(__name__) - -_max_input_chars_per_word = 100 - - -def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): - """ wordpiece: helloworld => [hello, ##world] """ - chars = list(token) - if len(chars) > _max_input_chars_per_word: - return [unk_token], [(0, len(chars))] - - is_bad = False - start = 0 - sub_tokens = [] - sub_pos = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start == 0: - substr = sentencepiece_prefix + substr - if start > 0: - substr = prefix + substr - if substr in vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - sub_pos.append((start, end)) - start = end - if is_bad: - return [unk_token], [(0, len(chars))] - else: - return sub_tokens, sub_pos - - -class ErnieTokenizer(object): - def __init__(self, - vocab, - unk_token='[UNK]', - sep_token='[SEP]', - cls_token='[CLS]', - pad_token='[PAD]', - mask_token='[MASK]', - wordpiece_prefix='##', - sentencepiece_prefix='', - lower=True, - encoding='utf8', - special_token_list=[]): - if not isinstance(vocab, dict): - raise ValueError( - 'expect `vocab` to be instance of dict, got %s' % type(vocab)) - self.vocab = vocab - self.lower = lower - self.prefix = wordpiece_prefix - self.sentencepiece_prefix = sentencepiece_prefix - self.pad_id = self.vocab[pad_token] - self.cls_id = cls_token and self.vocab[cls_token] - self.sep_id = sep_token and self.vocab[sep_token] - self.unk_id = unk_token and self.vocab[unk_token] - self.mask_id = mask_token and self.vocab[mask_token] - self.unk_token = unk_token - special_tokens = { - pad_token, cls_token, sep_token, unk_token, mask_token - } | set(special_token_list) - pat_str = '' - for t in special_tokens: - if t is None: - continue - pat_str += '(%s)|' % re.escape(t) - pat_str += r'([a-zA-Z0-9]+|\S)' - log.debug('regex: %s' % pat_str) - self.pat = re.compile(pat_str) - self.encoding = encoding - - def tokenize(self, text): - if len(text) == 0: - return [] - if six.PY3 and not isinstance(text, six.string_types): - text = text.decode(self.encoding) - if six.PY2 and isinstance(text, str): - text = text.decode(self.encoding) - - res = [] - for match in self.pat.finditer(text): - match_group = match.group(0) - if match.groups()[-1]: - if self.lower: - match_group = match_group.lower() - words, _ = _wordpiece( - match_group, - vocab=self.vocab, - unk_token=self.unk_token, - prefix=self.prefix, - sentencepiece_prefix=self.sentencepiece_prefix) - else: - words = [match_group] - res += words - return res - - def convert_tokens_to_ids(self, tokens): - return [self.vocab.get(t, self.unk_id) for t in tokens] - - def truncate(self, id1, id2, seqlen): - len1 = len(id1) - len2 = len(id2) - half = seqlen // 2 - if len1 > len2: - len1_truncated, len2_truncated = max(half, seqlen - len2), min( - half, len2) - else: - len1_truncated, len2_truncated = min(half, seqlen - len1), max( - half, seqlen - len1) - return id1[:len1_truncated], id2[:len2_truncated] - - def build_for_ernie(self, text_id, pair_id=[]): - """build sentence type id, add [CLS] [SEP]""" - text_id_type = np.zeros_like(text_id, dtype=np.int64) - ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) - - if len(pair_id): - pair_id_type = np.ones_like(pair_id, dtype=np.int64) - ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) - return ret_id, ret_id_type - - def encode(self, text, pair=None, truncate_to=None): - text_id = np.array( - self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) - text_id_type = np.zeros_like(text_id, dtype=np.int64) - if pair is not None: - pair_id = np.array( - self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) - else: - pair_id = [] - if truncate_to is not None: - text_id, pair_id = self.truncate( - text_id, [] if pair_id is None else pair_id, truncate_to) - - ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) - return ret_id, ret_id_type diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/module.py b/hub_module/modules/text/text_generation/ernie_gen_couplet/module.py deleted file mode 100644 index 14a6c18e12198f4c4287b8f0638b0aabe5063da2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_couplet/module.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import json - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import argparse -import os -import numpy as np - -import paddle.fluid.dygraph as D - -from ernie_gen_couplet.model.tokenizing_ernie import ErnieTokenizer -from ernie_gen_couplet.model.decode import beam_search_infilling -from ernie_gen_couplet.model.modeling_ernie_gen import ErnieModelForGeneration - - -@moduleinfo( - name="ernie_gen_couplet", - version="1.0.2", - summary= - "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for couplet generation task.", - author="baidu-nlp", - author_email="", - type="nlp/text_generation", -) -class ErnieGen(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - assets_path = os.path.join(self.directory, "assets") - gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_couplet") - ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') - with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: - ernie_cfg = dict(json.loads(ernie_cfg_file.read())) - ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') - with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: - ernie_vocab = { - j.strip().split('\t')[0]: i - for i, j in enumerate(ernie_vocab_file.readlines()) - } - - with fluid.dygraph.guard(fluid.CPUPlace()): - with fluid.unique_name.guard(): - self.model = ErnieModelForGeneration(ernie_cfg) - finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) - self.model.set_dict(finetuned_states) - - self.tokenizer = ErnieTokenizer(ernie_vocab) - self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} - self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] - self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] - self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) - - @serving - def generate(self, texts, use_gpu=False, beam_width=5): - """ - Get the right rolls from the left rolls. - - Args: - texts(list): the left rolls. - use_gpu(bool): whether use gpu to predict or not - beam_width(int): the beam search width. - - Returns: - results(list): the right rolls. - """ - if texts and isinstance(texts, list) and all(texts) and all( - [isinstance(text, str) for text in texts]): - predicted_data = texts - else: - raise ValueError( - "The input texts should be a list with nonempty string elements." - ) - for i, text in enumerate(texts): - for char in text: - if not '\u4e00' <= char <= '\u9fff': - logger.warning( - 'The input text: %s, contains non-Chinese characters, which may result in magic output' - % text) - break - - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - with fluid.dygraph.guard(place): - self.model.eval() - results = [] - for text in predicted_data: - sample_results = [] - ids, sids = self.tokenizer.encode(text) - src_ids = D.to_variable(np.expand_dims(ids, 0)) - src_sids = D.to_variable(np.expand_dims(sids, 0)) - output_ids = beam_search_infilling( - self.model, - src_ids, - src_sids, - eos_id=self.tokenizer.sep_id, - sos_id=self.tokenizer.cls_id, - attn_id=self.tokenizer.vocab['[MASK]'], - max_decode_len=20, - max_encode_len=20, - beam_width=beam_width, - tgt_type_id=1) - output_str = self.rev_lookup(output_ids[0].numpy()) - - for ostr in output_str.tolist(): - if '[SEP]' in ostr: - ostr = ostr[:ostr.index('[SEP]')] - sample_results.append("".join(ostr)) - results.append(sample_results) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--beam_width', type=int, default=5, help="the beam search width") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate( - texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) - - return results - - -if __name__ == "__main__": - module = ErnieGen() - for result in module.generate(['上海自来水来自海上', '风吹云乱天垂泪'], beam_width=5): - print(result) diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/decode.py b/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/decode.py deleted file mode 100644 index c58fdbe2e8902346162f8733ef0cd94ba65757a2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/decode.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import numpy as np -from collections import namedtuple - -import paddle.fluid as F -import paddle.fluid.layers as L -import paddle.fluid.dygraph as D - - -def gen_bias(encoder_inputs, decoder_inputs, step): - decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] - attn_bias = L.reshape( - L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) - decoder_bias = L.cast( - (L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), - 'float32') #[1, 1, decoderlen, decoderlen] - encoder_bias = L.unsqueeze( - L.cast(L.ones_like(encoder_inputs), 'float32'), - [1]) #[bsz, 1, encoderlen] - encoder_bias = L.expand( - encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] - decoder_bias = L.expand(decoder_bias, - [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] - if step > 0: - bias = L.concat([ - encoder_bias, - L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias - ], -1) - else: - bias = L.concat([encoder_bias, decoder_bias], -1) - return bias - - -@D.no_grad -def greedy_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - tgt_type_id=3): - model.eval() - _, logits, info = model(q_ids, q_sids) - gen_ids = L.argmax(logits, -1) - d_batch, d_seqlen = q_ids.shape - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - has_stopped = np.zeros([d_batch], dtype=np.bool) - gen_seq_len = np.zeros([d_batch], dtype=np.int64) - output_ids = [] - - past_cache = info['caches'] - - cls_ids = L.ones([d_batch], dtype='int64') * sos_id - attn_ids = L.ones([d_batch], dtype='int64') * attn_id - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) - pos_ids += seqlen - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - gen_ids = L.argmax(logits, -1) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - L.concat([pk, k[:, :1, :]], 1) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - L.concat([pv, v[:, :1, :]], 1) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - gen_ids = gen_ids[:, 1] - ids = L.stack([gen_ids, attn_ids], 1) - - gen_ids = gen_ids.numpy() - has_stopped |= (gen_ids == eos_id).astype(np.bool) - gen_seq_len += (1 - has_stopped.astype(np.int64)) - output_ids.append(gen_ids.tolist()) - if has_stopped.all(): - break - output_ids = np.array(output_ids).transpose([1, 0]) - return output_ids - - -BeamSearchState = namedtuple('BeamSearchState', - ['log_probs', 'lengths', 'finished']) -BeamSearchOutput = namedtuple('BeamSearchOutput', - ['scores', 'predicted_ids', 'beam_parent_ids']) - - -def log_softmax(x): - e_x = np.exp(x - np.max(x)) - return np.log(e_x / e_x.sum()) - - -def mask_prob(p, onehot_eos, finished): - is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') - p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + ( - 1. - is_finished) * p - return p - - -def hyp_score(log_probs, length, length_penalty): - lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) - return log_probs / lp - - -def beam_search_step(state, logits, eos_id, beam_width, is_first_step, - length_penalty): - """logits.shape == [B*W, V]""" - _, vocab_size = logits.shape - - bsz, beam_width = state.log_probs.shape - onehot_eos = L.cast( - F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] - - probs = L.log(L.softmax(logits)) #[B*W, V] - probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] - allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] - - not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] - not_eos = 1 - onehot_eos - length_to_add = not_finished * not_eos #[B*W,V] - alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add - - allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) - alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) - allscore = hyp_score(allprobs, alllen, length_penalty) - if is_first_step: - allscore = L.reshape( - allscore, - [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 - scores, idx = L.topk(allscore, k=beam_width) #[B, W] - next_beam_id = idx // vocab_size #[B, W] - next_word_id = idx % vocab_size - - gather_idx = L.concat([L.where(idx != -1)[:, :1], - L.reshape(idx, [-1, 1])], 1) - next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) - next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) - - gather_idx = L.concat( - [L.where(next_beam_id != -1)[:, :1], - L.reshape(next_beam_id, [-1, 1])], 1) - next_finished = L.reshape( - L.gather_nd(state.finished, gather_idx), - state.finished.shape) #[gather new beam state according to new beam id] - - next_finished += L.cast(next_word_id == eos_id, 'int64') - next_finished = L.cast(next_finished > 0, 'int64') - - next_state = BeamSearchState( - log_probs=next_probs, lengths=next_len, finished=next_finished) - output = BeamSearchOutput( - scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) - - return output, next_state - - -@D.no_grad -def beam_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - beam_width=5, - tgt_type_id=3, - length_penalty=1.0): - model.eval() - _, __, info = model(q_ids, q_sids) - d_batch, d_seqlen = q_ids.shape - - state = BeamSearchState( - log_probs=L.zeros([d_batch, beam_width], 'float32'), - lengths=L.zeros([d_batch, beam_width], 'int64'), - finished=L.zeros([d_batch, beam_width], 'int64')) - outputs = [] - - def reorder_(t, parent_id): - """reorder cache according to parent beam id""" - gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape( - parent_id, [-1]) - t = L.gather(t, gather_idx) - return t - - def tile_(t, times): - _shapes = list(t.shape[1:]) - ret = L.reshape( - L.expand(L.unsqueeze(t, [1]), [ - 1, - times, - ] + [ - 1, - ] * len(_shapes)), [ - -1, - ] + _shapes) - return ret - - cached_k, cached_v = info['caches'] - cached_k = [tile_(k, beam_width) for k in cached_k] - cached_v = [tile_(v, beam_width) for v in cached_v] - past_cache = (cached_k, cached_v) - - q_ids = tile_(q_ids, beam_width) - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - - cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id - attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile( - np.array([[step, step + 1]], dtype=np.int64), - [d_batch * beam_width, 1])) - pos_ids += seqlen - - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - - output, state = beam_search_step( - state, - logits[:, 1], - eos_id=eos_id, - beam_width=beam_width, - is_first_step=(step == 0), - length_penalty=length_penalty) - outputs.append(output) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - pred_ids_flatten = L.reshape(output.predicted_ids, - [d_batch * beam_width]) - ids = L.stack([pred_ids_flatten, attn_ids], 1) - - if state.finished.numpy().all(): - break - - final_ids = L.stack([o.predicted_ids for o in outputs], 0) - final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) - final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, - #0] #pick best beam - final_ids = L.transpose( - L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) - return final_ids - - -en_patten = re.compile(r'^[a-zA-Z0-9]*$') - - -def post_process(token): - if token.startswith('##'): - ret = token[2:] - else: - if en_patten.match(token): - ret = ' ' + token - else: - ret = token - return ret diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/file_utils.py b/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/file_utils.py deleted file mode 100644 index 613a5213a83e7fbd2a126cdb49b12eb62d4de41f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/file_utils.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from tqdm import tqdm -from paddlehub.common.logger import logger -from paddlehub.common.dir import MODULE_HOME - - -def _fetch_from_remote(url, force_download=False): - import tempfile, requests, tarfile - cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") - if force_download or not os.path.exists(cached_dir): - with tempfile.NamedTemporaryFile() as f: - #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' - r = requests.get(url, stream=True) - total_len = int(r.headers.get('content-length')) - for chunk in tqdm( - r.iter_content(chunk_size=1024), - total=total_len // 1024, - desc='downloading %s' % url, - unit='KB'): - if chunk: - f.write(chunk) - f.flush() - logger.debug('extacting... to %s' % f.name) - with tarfile.open(f.name) as tf: - tf.extractall(path=cached_dir) - logger.debug('%s cached in %s' % (url, cached_dir)) - return cached_dir - - -def add_docstring(doc): - def func(f): - f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) - return f - - return func diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie.py deleted file mode 100644 index 7c2304f67d7347e584c244ab8384eff0720f7cc2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -import logging - -import paddle.fluid.dygraph as D -import paddle.fluid as F -import paddle.fluid.layers as L - -log = logging.getLogger(__name__) - - -def _build_linear(n_in, n_out, name, init, act=None): - return D.Linear( - n_in, - n_out, - param_attr=F.ParamAttr( - name='%s.w_0' % name if name is not None else None, - initializer=init), - bias_attr='%s.b_0' % name if name is not None else None, - act=act) - - -def _build_ln(n_in, name): - return D.LayerNorm( - normalized_shape=n_in, - param_attr=F.ParamAttr( - name='%s_layer_norm_scale' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - bias_attr=F.ParamAttr( - name='%s_layer_norm_bias' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - ) - - -def append_name(name, postfix): - if name is None: - return None - elif name == '': - return postfix - else: - return '%s_%s' % (name, postfix) - - -class AttentionLayer(D.Layer): - def __init__(self, cfg, name=None): - super(AttentionLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - n_head = cfg['num_attention_heads'] - assert d_model % n_head == 0 - d_model_q = cfg.get('query_hidden_size_per_head', - d_model // n_head) * n_head - d_model_v = cfg.get('value_hidden_size_per_head', - d_model // n_head) * n_head - self.n_head = n_head - self.d_key = d_model_q // n_head - self.q = _build_linear(d_model, d_model_q, append_name( - name, 'query_fc'), initializer) - self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), - initializer) - self.v = _build_linear(d_model, d_model_v, append_name( - name, 'value_fc'), initializer) - self.o = _build_linear(d_model_v, d_model, append_name( - name, 'output_fc'), initializer) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=cfg['attention_probs_dropout_prob'], - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, queries, keys, values, attn_bias, past_cache): - assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 - - q = self.q(queries) - k = self.k(keys) - v = self.v(values) - - cache = (k, v) - if past_cache is not None: - cached_k, cached_v = past_cache - k = L.concat([cached_k, k], 1) - v = L.concat([cached_v, v], 1) - - q = L.transpose( - L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - k = L.transpose( - L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - v = L.transpose( - L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - - q = L.scale(q, scale=self.d_key**-0.5) - score = L.matmul(q, k, transpose_y=True) - if attn_bias is not None: - score += attn_bias - score = L.softmax(score, use_cudnn=True) - score = self.dropout(score) - - out = L.matmul(score, v) - out = L.transpose(out, [0, 2, 1, 3]) - out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) - - out = self.o(out) - return out, cache - - -class PositionwiseFeedForwardLayer(D.Layer): - def __init__(self, cfg, name=None): - super(PositionwiseFeedForwardLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_ffn = cfg.get('intermediate_size', 4 * d_model) - assert cfg['hidden_act'] in ['relu', 'gelu'] - self.i = _build_linear( - d_model, - d_ffn, - append_name(name, 'fc_0'), - initializer, - act=cfg['hidden_act']) - self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), - initializer) - prob = cfg.get('intermediate_dropout_prob', 0.) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs): - hidden = self.i(inputs) - hidden = self.dropout(hidden) - out = self.o(hidden) - return out - - -class ErnieBlock(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieBlock, self).__init__() - d_model = cfg['hidden_size'] - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.attn = AttentionLayer( - cfg, name=append_name(name, 'multi_head_att')) - self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) - self.ffn = PositionwiseFeedForwardLayer( - cfg, name=append_name(name, 'ffn')) - self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) - prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs, attn_bias=None, past_cache=None): - attn_out, cache = self.attn( - inputs, inputs, inputs, attn_bias, - past_cache=past_cache) #self attn - attn_out = self.dropout(attn_out) - hidden = attn_out + inputs - hidden = self.ln1(hidden) # dropout/ add/ norm - - ffn_out = self.ffn(hidden) - ffn_out = self.dropout(ffn_out) - hidden = ffn_out + hidden - hidden = self.ln2(hidden) - return hidden, cache - - -class ErnieEncoderStack(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieEncoderStack, self).__init__() - n_layers = cfg['num_hidden_layers'] - self.block = D.LayerList([ - ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) - for i in range(n_layers) - ]) - - def forward(self, inputs, attn_bias=None, past_cache=None): - if past_cache is not None: - assert isinstance( - past_cache, tuple - ), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr( - type(past_cache)) - past_cache = list(zip(*past_cache)) - else: - past_cache = [None] * len(self.block) - cache_list_k, cache_list_v, hidden_list = [], [], [inputs] - - for b, p in zip(self.block, past_cache): - inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) - cache_k, cache_v = cache - cache_list_k.append(cache_k) - cache_list_v.append(cache_v) - hidden_list.append(inputs) - - return inputs, hidden_list, (cache_list_k, cache_list_v) - - -class ErnieModel(D.Layer): - def __init__(self, cfg, name=None): - """ - Fundamental pretrained Ernie model - """ - log.debug('init ErnieModel with config: %s' % repr(cfg)) - D.Layer.__init__(self) - d_model = cfg['hidden_size'] - d_emb = cfg.get('emb_size', cfg['hidden_size']) - d_vocab = cfg['vocab_size'] - d_pos = cfg['max_position_embeddings'] - d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] - self.n_head = cfg['num_attention_heads'] - self.return_additional_info = cfg.get('return_additional_info', False) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) - self.word_emb = D.Embedding([d_vocab, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'word_embedding'), - initializer=initializer)) - self.pos_emb = D.Embedding([d_pos, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'pos_embedding'), - initializer=initializer)) - self.sent_emb = D.Embedding([d_sent, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'sent_embedding'), - initializer=initializer)) - prob = cfg['hidden_dropout_prob'] - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - self.encoder_stack = ErnieEncoderStack(cfg, append_name( - name, 'encoder')) - if cfg.get('has_pooler', True): - self.pooler = _build_linear( - cfg['hidden_size'], - cfg['hidden_size'], - append_name(name, 'pooled_fc'), - initializer, - act='tanh') - else: - self.pooler = None - self.train() - - def eval(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).eval() - self.training = False - for l in self.sublayers(): - l.training = False - - def train(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).train() - self.training = True - for l in self.sublayers(): - l.training = True - - def forward(self, - src_ids, - sent_ids=None, - pos_ids=None, - input_mask=None, - attn_bias=None, - past_cache=None, - use_causal_mask=False): - """ - Args: - src_ids (`Variable` of shape `[batch_size, seq_len]`): - Indices of input sequence tokens in the vocabulary. - sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): - aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. - if None, assume all tokens come from `segment_a` - pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): - Indices of positions of each input sequence tokens in the position embeddings. - input_mask(optional `Variable` of shape `[batch_size, seq_len]`): - Mask to avoid performing attention on the padding token indices of the encoder input. - attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): - 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask - past_cache(optional, tuple of two lists: cached key and cached value, - each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): - cached key/value tensor that will be concated to generated key/value when performing self attention. - if set, `attn_bias` should not be None. - - Returns: - pooled (`Variable` of shape `[batch_size, hidden_size]`): - output logits of pooler classifier - encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): - output logits of transformer stack - """ - assert len( - src_ids.shape - ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr( - src_ids.shape)) - assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' - d_batch = L.shape(src_ids)[0] - d_seqlen = L.shape(src_ids)[1] - if pos_ids is None: - pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) - pos_ids = L.cast(pos_ids, 'int64') - if attn_bias is None: - if input_mask is None: - input_mask = L.cast(src_ids != 0, 'float32') - assert len(input_mask.shape) == 2 - input_mask = L.unsqueeze(input_mask, axes=[-1]) - attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) - if use_causal_mask: - sequence = L.reshape( - L.range(0, d_seqlen, 1, dtype='float32') + 1., - [1, 1, -1, 1]) - causal_mask = L.cast( - (L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), - 'float32') - attn_bias *= causal_mask - else: - assert len( - attn_bias.shape - ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape - attn_bias = (1. - attn_bias) * -10000.0 - attn_bias = L.unsqueeze(attn_bias, [1]) - attn_bias = L.expand(attn_bias, - [1, self.n_head, 1, 1]) # avoid broadcast =_= - attn_bias.stop_gradient = True - - if sent_ids is None: - sent_ids = L.zeros_like(src_ids) - - src_embedded = self.word_emb(src_ids) - pos_embedded = self.pos_emb(pos_ids) - sent_embedded = self.sent_emb(sent_ids) - embedded = src_embedded + pos_embedded + sent_embedded - - embedded = self.dropout(self.ln(embedded)) - - encoded, hidden_list, cache_list = self.encoder_stack( - embedded, attn_bias, past_cache=past_cache) - if self.pooler is not None: - pooled = self.pooler(encoded[:, 0, :]) - else: - pooled = None - - additional_info = { - 'hiddens': hidden_list, - 'caches': cache_list, - } - - if self.return_additional_info: - return pooled, encoded, additional_info - else: - return pooled, encoded diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie_gen.py b/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie_gen.py deleted file mode 100644 index 49be9de286e17e7bcba9e71a4f28ce8940c98749..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie_gen.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as F -import paddle.fluid.layers as L - -from ernie_gen_lover_words.model.modeling_ernie import ErnieModel -from ernie_gen_lover_words.model.modeling_ernie import _build_linear, _build_ln, append_name - - -class ErnieModelForGeneration(ErnieModel): - def __init__(self, cfg, name=None): - cfg['return_additional_info'] = True - cfg['has_pooler'] = False - super(ErnieModelForGeneration, self).__init__(cfg, name=name) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_vocab = cfg['vocab_size'] - - self.mlm = _build_linear( - d_model, - d_model, - append_name(name, 'mask_lm_trans_fc'), - initializer, - act=cfg['hidden_act']) - self.mlm_ln = _build_ln( - d_model, name=append_name(name, 'mask_lm_trans')) - self.mlm_bias = L.create_parameter( - dtype='float32', - shape=[d_vocab], - attr=F.ParamAttr( - name=append_name(name, 'mask_lm_out_fc.b_0'), - initializer=F.initializer.Constant(value=0.0)), - is_bias=True, - ) - - def forward(self, src_ids, *args, **kwargs): - tgt_labels = kwargs.pop('tgt_labels', None) - tgt_pos = kwargs.pop('tgt_pos', None) - encode_only = kwargs.pop('encode_only', False) - _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) - if encode_only: - return None, None, info - elif tgt_labels is None: - encoded = self.mlm(encoded) - encoded = self.mlm_ln(encoded) - logits = L.matmul( - encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias - output_ids = L.argmax(logits, -1) - return output_ids, logits, info - else: - encoded_2d = L.gather_nd(encoded, tgt_pos) - encoded_2d = self.mlm(encoded_2d) - encoded_2d = self.mlm_ln(encoded_2d) - logits_2d = L.matmul( - encoded_2d, self.word_emb.weight, - transpose_y=True) + self.mlm_bias - if len(tgt_labels.shape) == 1: - tgt_labels = L.reshape(tgt_labels, [-1, 1]) - - loss = L.reduce_mean( - L.softmax_with_cross_entropy( - logits_2d, - tgt_labels, - soft_label=(tgt_labels.shape[-1] != 1))) - return loss, logits_2d, info diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/tokenizing_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/tokenizing_ernie.py deleted file mode 100644 index 3039b7028f5da991189527b8145b05c952dafbbd..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_lover_words/model/tokenizing_ernie.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six -import re -import logging -from functools import partial - -import numpy as np - -import io - -open = partial(io.open, encoding='utf8') - -log = logging.getLogger(__name__) - -_max_input_chars_per_word = 100 - - -def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): - """ wordpiece: helloworld => [hello, ##world] """ - chars = list(token) - if len(chars) > _max_input_chars_per_word: - return [unk_token], [(0, len(chars))] - - is_bad = False - start = 0 - sub_tokens = [] - sub_pos = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start == 0: - substr = sentencepiece_prefix + substr - if start > 0: - substr = prefix + substr - if substr in vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - sub_pos.append((start, end)) - start = end - if is_bad: - return [unk_token], [(0, len(chars))] - else: - return sub_tokens, sub_pos - - -class ErnieTokenizer(object): - def __init__(self, - vocab, - unk_token='[UNK]', - sep_token='[SEP]', - cls_token='[CLS]', - pad_token='[PAD]', - mask_token='[MASK]', - wordpiece_prefix='##', - sentencepiece_prefix='', - lower=True, - encoding='utf8', - special_token_list=[]): - if not isinstance(vocab, dict): - raise ValueError( - 'expect `vocab` to be instance of dict, got %s' % type(vocab)) - self.vocab = vocab - self.lower = lower - self.prefix = wordpiece_prefix - self.sentencepiece_prefix = sentencepiece_prefix - self.pad_id = self.vocab[pad_token] - self.cls_id = cls_token and self.vocab[cls_token] - self.sep_id = sep_token and self.vocab[sep_token] - self.unk_id = unk_token and self.vocab[unk_token] - self.mask_id = mask_token and self.vocab[mask_token] - self.unk_token = unk_token - special_tokens = { - pad_token, cls_token, sep_token, unk_token, mask_token - } | set(special_token_list) - pat_str = '' - for t in special_tokens: - if t is None: - continue - pat_str += '(%s)|' % re.escape(t) - pat_str += r'([a-zA-Z0-9]+|\S)' - log.debug('regex: %s' % pat_str) - self.pat = re.compile(pat_str) - self.encoding = encoding - - def tokenize(self, text): - if len(text) == 0: - return [] - if six.PY3 and not isinstance(text, six.string_types): - text = text.decode(self.encoding) - if six.PY2 and isinstance(text, str): - text = text.decode(self.encoding) - - res = [] - for match in self.pat.finditer(text): - match_group = match.group(0) - if match.groups()[-1]: - if self.lower: - match_group = match_group.lower() - words, _ = _wordpiece( - match_group, - vocab=self.vocab, - unk_token=self.unk_token, - prefix=self.prefix, - sentencepiece_prefix=self.sentencepiece_prefix) - else: - words = [match_group] - res += words - return res - - def convert_tokens_to_ids(self, tokens): - return [self.vocab.get(t, self.unk_id) for t in tokens] - - def truncate(self, id1, id2, seqlen): - len1 = len(id1) - len2 = len(id2) - half = seqlen // 2 - if len1 > len2: - len1_truncated, len2_truncated = max(half, seqlen - len2), min( - half, len2) - else: - len1_truncated, len2_truncated = min(half, seqlen - len1), max( - half, seqlen - len1) - return id1[:len1_truncated], id2[:len2_truncated] - - def build_for_ernie(self, text_id, pair_id=[]): - """build sentence type id, add [CLS] [SEP]""" - text_id_type = np.zeros_like(text_id, dtype=np.int64) - ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) - - if len(pair_id): - pair_id_type = np.ones_like(pair_id, dtype=np.int64) - ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) - return ret_id, ret_id_type - - def encode(self, text, pair=None, truncate_to=None): - text_id = np.array( - self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) - text_id_type = np.zeros_like(text_id, dtype=np.int64) - if pair is not None: - pair_id = np.array( - self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) - else: - pair_id = [] - if truncate_to is not None: - text_id, pair_id = self.truncate( - text_id, [] if pair_id is None else pair_id, truncate_to) - - ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) - return ret_id, ret_id_type diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/module.py b/hub_module/modules/text/text_generation/ernie_gen_lover_words/module.py deleted file mode 100644 index 7df7b1726350a516dba5e93741e8ec547bc6e774..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_lover_words/module.py +++ /dev/null @@ -1,185 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import json - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import argparse -import os -import numpy as np - -import paddle.fluid.dygraph as D - -from ernie_gen_lover_words.model.tokenizing_ernie import ErnieTokenizer -from ernie_gen_lover_words.model.decode import beam_search_infilling -from ernie_gen_lover_words.model.modeling_ernie_gen import ErnieModelForGeneration - - -@moduleinfo( - name="ernie_gen_lover_words", - version="1.0.1", - summary= - "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for lover's words generation task.", - author="adaxiadaxi", - author_email="", - type="nlp/text_generation", -) -class ErnieGen(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - assets_path = os.path.join(self.directory, "assets") - gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_lover_words") - ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') - with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: - ernie_cfg = dict(json.loads(ernie_cfg_file.read())) - ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') - with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: - ernie_vocab = { - j.strip().split('\t')[0]: i - for i, j in enumerate(ernie_vocab_file.readlines()) - } - - with fluid.dygraph.guard(fluid.CPUPlace()): - with fluid.unique_name.guard(): - self.model = ErnieModelForGeneration(ernie_cfg) - finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) - self.model.set_dict(finetuned_states) - - self.tokenizer = ErnieTokenizer(ernie_vocab) - self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} - self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] - self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] - self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) - - @serving - def generate(self, texts, use_gpu=False, beam_width=5): - """ - Get the continuation of the input poetry. - - Args: - texts(list): the front part of a poetry. - use_gpu(bool): whether use gpu to predict or not - beam_width(int): the beam search width. - - Returns: - results(list): the poetry continuations. - """ - if texts and isinstance(texts, list) and all(texts) and all( - [isinstance(text, str) for text in texts]): - predicted_data = texts - else: - raise ValueError( - "The input texts should be a list with nonempty string elements." - ) - - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - with fluid.dygraph.guard(place): - self.model.eval() - results = [] - for text in predicted_data: - sample_results = [] - ids, sids = self.tokenizer.encode(text) - src_ids = D.to_variable(np.expand_dims(ids, 0)) - src_sids = D.to_variable(np.expand_dims(sids, 0)) - output_ids = beam_search_infilling( - self.model, - src_ids, - src_sids, - eos_id=self.tokenizer.sep_id, - sos_id=self.tokenizer.cls_id, - attn_id=self.tokenizer.vocab['[MASK]'], - max_decode_len=80, - max_encode_len=20, - beam_width=beam_width, - tgt_type_id=1) - output_str = self.rev_lookup(output_ids[0].numpy()) - - for ostr in output_str.tolist(): - if '[SEP]' in ostr: - ostr = ostr[:ostr.index('[SEP]')] - sample_results.append("".join(ostr)) - results.append(sample_results) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--beam_width', type=int, default=5, help="the beam search width") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate( - texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) - - return results - - -if __name__ == "__main__": - module = ErnieGen() - for result in module.generate(['昔年旅南服,始识王荆州。', '高名出汉阴,禅阁跨香岑。'], - beam_width=5): - print(result) diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/decode.py b/hub_module/modules/text/text_generation/ernie_gen_poetry/model/decode.py deleted file mode 100644 index c58fdbe2e8902346162f8733ef0cd94ba65757a2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/decode.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import numpy as np -from collections import namedtuple - -import paddle.fluid as F -import paddle.fluid.layers as L -import paddle.fluid.dygraph as D - - -def gen_bias(encoder_inputs, decoder_inputs, step): - decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] - attn_bias = L.reshape( - L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) - decoder_bias = L.cast( - (L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), - 'float32') #[1, 1, decoderlen, decoderlen] - encoder_bias = L.unsqueeze( - L.cast(L.ones_like(encoder_inputs), 'float32'), - [1]) #[bsz, 1, encoderlen] - encoder_bias = L.expand( - encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] - decoder_bias = L.expand(decoder_bias, - [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] - if step > 0: - bias = L.concat([ - encoder_bias, - L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias - ], -1) - else: - bias = L.concat([encoder_bias, decoder_bias], -1) - return bias - - -@D.no_grad -def greedy_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - tgt_type_id=3): - model.eval() - _, logits, info = model(q_ids, q_sids) - gen_ids = L.argmax(logits, -1) - d_batch, d_seqlen = q_ids.shape - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - has_stopped = np.zeros([d_batch], dtype=np.bool) - gen_seq_len = np.zeros([d_batch], dtype=np.int64) - output_ids = [] - - past_cache = info['caches'] - - cls_ids = L.ones([d_batch], dtype='int64') * sos_id - attn_ids = L.ones([d_batch], dtype='int64') * attn_id - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) - pos_ids += seqlen - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - gen_ids = L.argmax(logits, -1) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - L.concat([pk, k[:, :1, :]], 1) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - L.concat([pv, v[:, :1, :]], 1) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - gen_ids = gen_ids[:, 1] - ids = L.stack([gen_ids, attn_ids], 1) - - gen_ids = gen_ids.numpy() - has_stopped |= (gen_ids == eos_id).astype(np.bool) - gen_seq_len += (1 - has_stopped.astype(np.int64)) - output_ids.append(gen_ids.tolist()) - if has_stopped.all(): - break - output_ids = np.array(output_ids).transpose([1, 0]) - return output_ids - - -BeamSearchState = namedtuple('BeamSearchState', - ['log_probs', 'lengths', 'finished']) -BeamSearchOutput = namedtuple('BeamSearchOutput', - ['scores', 'predicted_ids', 'beam_parent_ids']) - - -def log_softmax(x): - e_x = np.exp(x - np.max(x)) - return np.log(e_x / e_x.sum()) - - -def mask_prob(p, onehot_eos, finished): - is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') - p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + ( - 1. - is_finished) * p - return p - - -def hyp_score(log_probs, length, length_penalty): - lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) - return log_probs / lp - - -def beam_search_step(state, logits, eos_id, beam_width, is_first_step, - length_penalty): - """logits.shape == [B*W, V]""" - _, vocab_size = logits.shape - - bsz, beam_width = state.log_probs.shape - onehot_eos = L.cast( - F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] - - probs = L.log(L.softmax(logits)) #[B*W, V] - probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] - allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] - - not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] - not_eos = 1 - onehot_eos - length_to_add = not_finished * not_eos #[B*W,V] - alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add - - allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) - alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) - allscore = hyp_score(allprobs, alllen, length_penalty) - if is_first_step: - allscore = L.reshape( - allscore, - [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 - scores, idx = L.topk(allscore, k=beam_width) #[B, W] - next_beam_id = idx // vocab_size #[B, W] - next_word_id = idx % vocab_size - - gather_idx = L.concat([L.where(idx != -1)[:, :1], - L.reshape(idx, [-1, 1])], 1) - next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) - next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) - - gather_idx = L.concat( - [L.where(next_beam_id != -1)[:, :1], - L.reshape(next_beam_id, [-1, 1])], 1) - next_finished = L.reshape( - L.gather_nd(state.finished, gather_idx), - state.finished.shape) #[gather new beam state according to new beam id] - - next_finished += L.cast(next_word_id == eos_id, 'int64') - next_finished = L.cast(next_finished > 0, 'int64') - - next_state = BeamSearchState( - log_probs=next_probs, lengths=next_len, finished=next_finished) - output = BeamSearchOutput( - scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) - - return output, next_state - - -@D.no_grad -def beam_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - beam_width=5, - tgt_type_id=3, - length_penalty=1.0): - model.eval() - _, __, info = model(q_ids, q_sids) - d_batch, d_seqlen = q_ids.shape - - state = BeamSearchState( - log_probs=L.zeros([d_batch, beam_width], 'float32'), - lengths=L.zeros([d_batch, beam_width], 'int64'), - finished=L.zeros([d_batch, beam_width], 'int64')) - outputs = [] - - def reorder_(t, parent_id): - """reorder cache according to parent beam id""" - gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape( - parent_id, [-1]) - t = L.gather(t, gather_idx) - return t - - def tile_(t, times): - _shapes = list(t.shape[1:]) - ret = L.reshape( - L.expand(L.unsqueeze(t, [1]), [ - 1, - times, - ] + [ - 1, - ] * len(_shapes)), [ - -1, - ] + _shapes) - return ret - - cached_k, cached_v = info['caches'] - cached_k = [tile_(k, beam_width) for k in cached_k] - cached_v = [tile_(v, beam_width) for v in cached_v] - past_cache = (cached_k, cached_v) - - q_ids = tile_(q_ids, beam_width) - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - - cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id - attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable( - np.tile( - np.array([[step, step + 1]], dtype=np.int64), - [d_batch * beam_width, 1])) - pos_ids += seqlen - - _, logits, info = model( - ids, - L.ones_like(ids) * tgt_type_id, - pos_ids=pos_ids, - attn_bias=bias, - past_cache=past_cache) - - output, state = beam_search_step( - state, - logits[:, 1], - eos_id=eos_id, - beam_width=beam_width, - is_first_step=(step == 0), - length_penalty=length_penalty) - outputs.append(output) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) - for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) - for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - pred_ids_flatten = L.reshape(output.predicted_ids, - [d_batch * beam_width]) - ids = L.stack([pred_ids_flatten, attn_ids], 1) - - if state.finished.numpy().all(): - break - - final_ids = L.stack([o.predicted_ids for o in outputs], 0) - final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) - final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, - #0] #pick best beam - final_ids = L.transpose( - L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) - return final_ids - - -en_patten = re.compile(r'^[a-zA-Z0-9]*$') - - -def post_process(token): - if token.startswith('##'): - ret = token[2:] - else: - if en_patten.match(token): - ret = ' ' + token - else: - ret = token - return ret diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/file_utils.py b/hub_module/modules/text/text_generation/ernie_gen_poetry/model/file_utils.py deleted file mode 100644 index 613a5213a83e7fbd2a126cdb49b12eb62d4de41f..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/file_utils.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from tqdm import tqdm -from paddlehub.common.logger import logger -from paddlehub.common.dir import MODULE_HOME - - -def _fetch_from_remote(url, force_download=False): - import tempfile, requests, tarfile - cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") - if force_download or not os.path.exists(cached_dir): - with tempfile.NamedTemporaryFile() as f: - #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' - r = requests.get(url, stream=True) - total_len = int(r.headers.get('content-length')) - for chunk in tqdm( - r.iter_content(chunk_size=1024), - total=total_len // 1024, - desc='downloading %s' % url, - unit='KB'): - if chunk: - f.write(chunk) - f.flush() - logger.debug('extacting... to %s' % f.name) - with tarfile.open(f.name) as tf: - tf.extractall(path=cached_dir) - logger.debug('%s cached in %s' % (url, cached_dir)) - return cached_dir - - -def add_docstring(doc): - def func(f): - f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) - return f - - return func diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie.py deleted file mode 100644 index 7c2304f67d7347e584c244ab8384eff0720f7cc2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -import logging - -import paddle.fluid.dygraph as D -import paddle.fluid as F -import paddle.fluid.layers as L - -log = logging.getLogger(__name__) - - -def _build_linear(n_in, n_out, name, init, act=None): - return D.Linear( - n_in, - n_out, - param_attr=F.ParamAttr( - name='%s.w_0' % name if name is not None else None, - initializer=init), - bias_attr='%s.b_0' % name if name is not None else None, - act=act) - - -def _build_ln(n_in, name): - return D.LayerNorm( - normalized_shape=n_in, - param_attr=F.ParamAttr( - name='%s_layer_norm_scale' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - bias_attr=F.ParamAttr( - name='%s_layer_norm_bias' % name if name is not None else None, - initializer=F.initializer.Constant(1.)), - ) - - -def append_name(name, postfix): - if name is None: - return None - elif name == '': - return postfix - else: - return '%s_%s' % (name, postfix) - - -class AttentionLayer(D.Layer): - def __init__(self, cfg, name=None): - super(AttentionLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - n_head = cfg['num_attention_heads'] - assert d_model % n_head == 0 - d_model_q = cfg.get('query_hidden_size_per_head', - d_model // n_head) * n_head - d_model_v = cfg.get('value_hidden_size_per_head', - d_model // n_head) * n_head - self.n_head = n_head - self.d_key = d_model_q // n_head - self.q = _build_linear(d_model, d_model_q, append_name( - name, 'query_fc'), initializer) - self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), - initializer) - self.v = _build_linear(d_model, d_model_v, append_name( - name, 'value_fc'), initializer) - self.o = _build_linear(d_model_v, d_model, append_name( - name, 'output_fc'), initializer) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=cfg['attention_probs_dropout_prob'], - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, queries, keys, values, attn_bias, past_cache): - assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 - - q = self.q(queries) - k = self.k(keys) - v = self.v(values) - - cache = (k, v) - if past_cache is not None: - cached_k, cached_v = past_cache - k = L.concat([cached_k, k], 1) - v = L.concat([cached_v, v], 1) - - q = L.transpose( - L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - k = L.transpose( - L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - v = L.transpose( - L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - - q = L.scale(q, scale=self.d_key**-0.5) - score = L.matmul(q, k, transpose_y=True) - if attn_bias is not None: - score += attn_bias - score = L.softmax(score, use_cudnn=True) - score = self.dropout(score) - - out = L.matmul(score, v) - out = L.transpose(out, [0, 2, 1, 3]) - out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) - - out = self.o(out) - return out, cache - - -class PositionwiseFeedForwardLayer(D.Layer): - def __init__(self, cfg, name=None): - super(PositionwiseFeedForwardLayer, self).__init__() - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_ffn = cfg.get('intermediate_size', 4 * d_model) - assert cfg['hidden_act'] in ['relu', 'gelu'] - self.i = _build_linear( - d_model, - d_ffn, - append_name(name, 'fc_0'), - initializer, - act=cfg['hidden_act']) - self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), - initializer) - prob = cfg.get('intermediate_dropout_prob', 0.) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs): - hidden = self.i(inputs) - hidden = self.dropout(hidden) - out = self.o(hidden) - return out - - -class ErnieBlock(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieBlock, self).__init__() - d_model = cfg['hidden_size'] - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.attn = AttentionLayer( - cfg, name=append_name(name, 'multi_head_att')) - self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) - self.ffn = PositionwiseFeedForwardLayer( - cfg, name=append_name(name, 'ffn')) - self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) - prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs, attn_bias=None, past_cache=None): - attn_out, cache = self.attn( - inputs, inputs, inputs, attn_bias, - past_cache=past_cache) #self attn - attn_out = self.dropout(attn_out) - hidden = attn_out + inputs - hidden = self.ln1(hidden) # dropout/ add/ norm - - ffn_out = self.ffn(hidden) - ffn_out = self.dropout(ffn_out) - hidden = ffn_out + hidden - hidden = self.ln2(hidden) - return hidden, cache - - -class ErnieEncoderStack(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieEncoderStack, self).__init__() - n_layers = cfg['num_hidden_layers'] - self.block = D.LayerList([ - ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) - for i in range(n_layers) - ]) - - def forward(self, inputs, attn_bias=None, past_cache=None): - if past_cache is not None: - assert isinstance( - past_cache, tuple - ), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr( - type(past_cache)) - past_cache = list(zip(*past_cache)) - else: - past_cache = [None] * len(self.block) - cache_list_k, cache_list_v, hidden_list = [], [], [inputs] - - for b, p in zip(self.block, past_cache): - inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) - cache_k, cache_v = cache - cache_list_k.append(cache_k) - cache_list_v.append(cache_v) - hidden_list.append(inputs) - - return inputs, hidden_list, (cache_list_k, cache_list_v) - - -class ErnieModel(D.Layer): - def __init__(self, cfg, name=None): - """ - Fundamental pretrained Ernie model - """ - log.debug('init ErnieModel with config: %s' % repr(cfg)) - D.Layer.__init__(self) - d_model = cfg['hidden_size'] - d_emb = cfg.get('emb_size', cfg['hidden_size']) - d_vocab = cfg['vocab_size'] - d_pos = cfg['max_position_embeddings'] - d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] - self.n_head = cfg['num_attention_heads'] - self.return_additional_info = cfg.get('return_additional_info', False) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - - self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) - self.word_emb = D.Embedding([d_vocab, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'word_embedding'), - initializer=initializer)) - self.pos_emb = D.Embedding([d_pos, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'pos_embedding'), - initializer=initializer)) - self.sent_emb = D.Embedding([d_sent, d_emb], - param_attr=F.ParamAttr( - name=append_name( - name, 'sent_embedding'), - initializer=initializer)) - prob = cfg['hidden_dropout_prob'] - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - self.encoder_stack = ErnieEncoderStack(cfg, append_name( - name, 'encoder')) - if cfg.get('has_pooler', True): - self.pooler = _build_linear( - cfg['hidden_size'], - cfg['hidden_size'], - append_name(name, 'pooled_fc'), - initializer, - act='tanh') - else: - self.pooler = None - self.train() - - def eval(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).eval() - self.training = False - for l in self.sublayers(): - l.training = False - - def train(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).train() - self.training = True - for l in self.sublayers(): - l.training = True - - def forward(self, - src_ids, - sent_ids=None, - pos_ids=None, - input_mask=None, - attn_bias=None, - past_cache=None, - use_causal_mask=False): - """ - Args: - src_ids (`Variable` of shape `[batch_size, seq_len]`): - Indices of input sequence tokens in the vocabulary. - sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): - aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. - if None, assume all tokens come from `segment_a` - pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): - Indices of positions of each input sequence tokens in the position embeddings. - input_mask(optional `Variable` of shape `[batch_size, seq_len]`): - Mask to avoid performing attention on the padding token indices of the encoder input. - attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): - 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask - past_cache(optional, tuple of two lists: cached key and cached value, - each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): - cached key/value tensor that will be concated to generated key/value when performing self attention. - if set, `attn_bias` should not be None. - - Returns: - pooled (`Variable` of shape `[batch_size, hidden_size]`): - output logits of pooler classifier - encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): - output logits of transformer stack - """ - assert len( - src_ids.shape - ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr( - src_ids.shape)) - assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' - d_batch = L.shape(src_ids)[0] - d_seqlen = L.shape(src_ids)[1] - if pos_ids is None: - pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) - pos_ids = L.cast(pos_ids, 'int64') - if attn_bias is None: - if input_mask is None: - input_mask = L.cast(src_ids != 0, 'float32') - assert len(input_mask.shape) == 2 - input_mask = L.unsqueeze(input_mask, axes=[-1]) - attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) - if use_causal_mask: - sequence = L.reshape( - L.range(0, d_seqlen, 1, dtype='float32') + 1., - [1, 1, -1, 1]) - causal_mask = L.cast( - (L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), - 'float32') - attn_bias *= causal_mask - else: - assert len( - attn_bias.shape - ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape - attn_bias = (1. - attn_bias) * -10000.0 - attn_bias = L.unsqueeze(attn_bias, [1]) - attn_bias = L.expand(attn_bias, - [1, self.n_head, 1, 1]) # avoid broadcast =_= - attn_bias.stop_gradient = True - - if sent_ids is None: - sent_ids = L.zeros_like(src_ids) - - src_embedded = self.word_emb(src_ids) - pos_embedded = self.pos_emb(pos_ids) - sent_embedded = self.sent_emb(sent_ids) - embedded = src_embedded + pos_embedded + sent_embedded - - embedded = self.dropout(self.ln(embedded)) - - encoded, hidden_list, cache_list = self.encoder_stack( - embedded, attn_bias, past_cache=past_cache) - if self.pooler is not None: - pooled = self.pooler(encoded[:, 0, :]) - else: - pooled = None - - additional_info = { - 'hiddens': hidden_list, - 'caches': cache_list, - } - - if self.return_additional_info: - return pooled, encoded, additional_info - else: - return pooled, encoded diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie_gen.py b/hub_module/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie_gen.py deleted file mode 100644 index ec6dc8fcc13c3741cae03b46f280bb409058cc09..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie_gen.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as F -import paddle.fluid.layers as L - -from ernie_gen_poetry.model.modeling_ernie import ErnieModel -from ernie_gen_poetry.model.modeling_ernie import _build_linear, _build_ln, append_name - - -class ErnieModelForGeneration(ErnieModel): - def __init__(self, cfg, name=None): - cfg['return_additional_info'] = True - cfg['has_pooler'] = False - super(ErnieModelForGeneration, self).__init__(cfg, name=name) - initializer = F.initializer.TruncatedNormal( - scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_vocab = cfg['vocab_size'] - - self.mlm = _build_linear( - d_model, - d_model, - append_name(name, 'mask_lm_trans_fc'), - initializer, - act=cfg['hidden_act']) - self.mlm_ln = _build_ln( - d_model, name=append_name(name, 'mask_lm_trans')) - self.mlm_bias = L.create_parameter( - dtype='float32', - shape=[d_vocab], - attr=F.ParamAttr( - name=append_name(name, 'mask_lm_out_fc.b_0'), - initializer=F.initializer.Constant(value=0.0)), - is_bias=True, - ) - - def forward(self, src_ids, *args, **kwargs): - tgt_labels = kwargs.pop('tgt_labels', None) - tgt_pos = kwargs.pop('tgt_pos', None) - encode_only = kwargs.pop('encode_only', False) - _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) - if encode_only: - return None, None, info - elif tgt_labels is None: - encoded = self.mlm(encoded) - encoded = self.mlm_ln(encoded) - logits = L.matmul( - encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias - output_ids = L.argmax(logits, -1) - return output_ids, logits, info - else: - encoded_2d = L.gather_nd(encoded, tgt_pos) - encoded_2d = self.mlm(encoded_2d) - encoded_2d = self.mlm_ln(encoded_2d) - logits_2d = L.matmul( - encoded_2d, self.word_emb.weight, - transpose_y=True) + self.mlm_bias - if len(tgt_labels.shape) == 1: - tgt_labels = L.reshape(tgt_labels, [-1, 1]) - - loss = L.reduce_mean( - L.softmax_with_cross_entropy( - logits_2d, - tgt_labels, - soft_label=(tgt_labels.shape[-1] != 1))) - return loss, logits_2d, info diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/tokenizing_ernie.py b/hub_module/modules/text/text_generation/ernie_gen_poetry/model/tokenizing_ernie.py deleted file mode 100644 index 3039b7028f5da991189527b8145b05c952dafbbd..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_poetry/model/tokenizing_ernie.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six -import re -import logging -from functools import partial - -import numpy as np - -import io - -open = partial(io.open, encoding='utf8') - -log = logging.getLogger(__name__) - -_max_input_chars_per_word = 100 - - -def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): - """ wordpiece: helloworld => [hello, ##world] """ - chars = list(token) - if len(chars) > _max_input_chars_per_word: - return [unk_token], [(0, len(chars))] - - is_bad = False - start = 0 - sub_tokens = [] - sub_pos = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start == 0: - substr = sentencepiece_prefix + substr - if start > 0: - substr = prefix + substr - if substr in vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - sub_pos.append((start, end)) - start = end - if is_bad: - return [unk_token], [(0, len(chars))] - else: - return sub_tokens, sub_pos - - -class ErnieTokenizer(object): - def __init__(self, - vocab, - unk_token='[UNK]', - sep_token='[SEP]', - cls_token='[CLS]', - pad_token='[PAD]', - mask_token='[MASK]', - wordpiece_prefix='##', - sentencepiece_prefix='', - lower=True, - encoding='utf8', - special_token_list=[]): - if not isinstance(vocab, dict): - raise ValueError( - 'expect `vocab` to be instance of dict, got %s' % type(vocab)) - self.vocab = vocab - self.lower = lower - self.prefix = wordpiece_prefix - self.sentencepiece_prefix = sentencepiece_prefix - self.pad_id = self.vocab[pad_token] - self.cls_id = cls_token and self.vocab[cls_token] - self.sep_id = sep_token and self.vocab[sep_token] - self.unk_id = unk_token and self.vocab[unk_token] - self.mask_id = mask_token and self.vocab[mask_token] - self.unk_token = unk_token - special_tokens = { - pad_token, cls_token, sep_token, unk_token, mask_token - } | set(special_token_list) - pat_str = '' - for t in special_tokens: - if t is None: - continue - pat_str += '(%s)|' % re.escape(t) - pat_str += r'([a-zA-Z0-9]+|\S)' - log.debug('regex: %s' % pat_str) - self.pat = re.compile(pat_str) - self.encoding = encoding - - def tokenize(self, text): - if len(text) == 0: - return [] - if six.PY3 and not isinstance(text, six.string_types): - text = text.decode(self.encoding) - if six.PY2 and isinstance(text, str): - text = text.decode(self.encoding) - - res = [] - for match in self.pat.finditer(text): - match_group = match.group(0) - if match.groups()[-1]: - if self.lower: - match_group = match_group.lower() - words, _ = _wordpiece( - match_group, - vocab=self.vocab, - unk_token=self.unk_token, - prefix=self.prefix, - sentencepiece_prefix=self.sentencepiece_prefix) - else: - words = [match_group] - res += words - return res - - def convert_tokens_to_ids(self, tokens): - return [self.vocab.get(t, self.unk_id) for t in tokens] - - def truncate(self, id1, id2, seqlen): - len1 = len(id1) - len2 = len(id2) - half = seqlen // 2 - if len1 > len2: - len1_truncated, len2_truncated = max(half, seqlen - len2), min( - half, len2) - else: - len1_truncated, len2_truncated = min(half, seqlen - len1), max( - half, seqlen - len1) - return id1[:len1_truncated], id2[:len2_truncated] - - def build_for_ernie(self, text_id, pair_id=[]): - """build sentence type id, add [CLS] [SEP]""" - text_id_type = np.zeros_like(text_id, dtype=np.int64) - ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) - - if len(pair_id): - pair_id_type = np.ones_like(pair_id, dtype=np.int64) - ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) - return ret_id, ret_id_type - - def encode(self, text, pair=None, truncate_to=None): - text_id = np.array( - self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) - text_id_type = np.zeros_like(text_id, dtype=np.int64) - if pair is not None: - pair_id = np.array( - self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) - else: - pair_id = [] - if truncate_to is not None: - text_id, pair_id = self.truncate( - text_id, [] if pair_id is None else pair_id, truncate_to) - - ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) - return ret_id, ret_id_type diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/module.py b/hub_module/modules/text/text_generation/ernie_gen_poetry/module.py deleted file mode 100644 index e7d3914c5bbe8336839075ba91b11d558c0a84f9..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_gen_poetry/module.py +++ /dev/null @@ -1,203 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import json - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import argparse -import os -import numpy as np - -import paddle.fluid.dygraph as D - -from ernie_gen_poetry.model.tokenizing_ernie import ErnieTokenizer -from ernie_gen_poetry.model.decode import beam_search_infilling -from ernie_gen_poetry.model.modeling_ernie_gen import ErnieModelForGeneration - - -@moduleinfo( - name="ernie_gen_poetry", - version="1.0.2", - summary= - "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for poetry generation task.", - author="baidu-nlp", - author_email="", - type="nlp/text_generation", -) -class ErnieGen(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - assets_path = os.path.join(self.directory, "assets") - gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_poetry") - ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') - with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: - ernie_cfg = dict(json.loads(ernie_cfg_file.read())) - ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') - with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: - ernie_vocab = { - j.strip().split('\t')[0]: i - for i, j in enumerate(ernie_vocab_file.readlines()) - } - - with fluid.dygraph.guard(fluid.CPUPlace()): - with fluid.unique_name.guard(): - self.model = ErnieModelForGeneration(ernie_cfg) - finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) - self.model.set_dict(finetuned_states) - - self.tokenizer = ErnieTokenizer(ernie_vocab) - self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} - self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] - self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] - self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) - - @serving - def generate(self, texts, use_gpu=False, beam_width=5): - """ - Get the continuation of the input poetry. - - Args: - texts(list): the front part of a poetry. - use_gpu(bool): whether use gpu to predict or not - beam_width(int): the beam search width. - - Returns: - results(list): the poetry continuations. - """ - if texts and isinstance(texts, list) and all(texts) and all( - [isinstance(text, str) for text in texts]): - predicted_data = texts - else: - raise ValueError( - "The input texts should be a list with nonempty string elements." - ) - for i, text in enumerate(texts): - if ',' not in text or '。' not in text: - logger.warning( - "The input text: %s, does not contain ',' or '。', which is not a complete verse and may result in magic output" - % text) - else: - front, rear = text[:-1].split(',') - if len(front) != len(rear): - logger.warning( - "The input text: %s, is no antithetical parallelism, which may result in magic output" - % text) - - for char in text: - if not '\u4e00' <= char <= '\u9fff' and char not in [',', '。']: - logger.warning( - "The input text: %s, contains characters not Chinese or ‘,’ '。', which may result in magic output" - % text) - break - - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - with fluid.dygraph.guard(place): - self.model.eval() - results = [] - for text in predicted_data: - sample_results = [] - ids, sids = self.tokenizer.encode(text) - src_ids = D.to_variable(np.expand_dims(ids, 0)) - src_sids = D.to_variable(np.expand_dims(sids, 0)) - output_ids = beam_search_infilling( - self.model, - src_ids, - src_sids, - eos_id=self.tokenizer.sep_id, - sos_id=self.tokenizer.cls_id, - attn_id=self.tokenizer.vocab['[MASK]'], - max_decode_len=80, - max_encode_len=20, - beam_width=beam_width, - tgt_type_id=1) - output_str = self.rev_lookup(output_ids[0].numpy()) - - for ostr in output_str.tolist(): - if '[SEP]' in ostr: - ostr = ostr[:ostr.index('[SEP]')] - sample_results.append("".join(ostr)) - results.append(sample_results) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - self.arg_config_group.add_argument( - '--beam_width', type=int, default=5, help="the beam search width") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate( - texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) - - return results - - -if __name__ == "__main__": - module = ErnieGen() - for result in module.generate(['昔年旅南服,始识王荆州。', '高名出汉阴,禅阁跨香岑。'], - beam_width=5): - print(result) diff --git a/hub_module/modules/text/text_generation/ernie_tiny_couplet/module.py b/hub_module/modules/text/text_generation/ernie_tiny_couplet/module.py deleted file mode 100644 index 53d5b6c8aa9d21ac1a344b9b17843381ca571d18..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/ernie_tiny_couplet/module.py +++ /dev/null @@ -1,144 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import ast -import argparse - -import paddlehub as hub -from paddlehub.module.module import moduleinfo, serving, runnable -from paddlehub.module.nlp_module import DataFormatError - - -@moduleinfo( - name="ernie_tiny_couplet", - version="1.0.0", - summary="couplet generation model fine-tuned with ernie_tiny module", - author="paddlehub", - author_email="", - type="nlp/text_generation", -) -class ErnieTinyCouplet(hub.NLPPredictionModule): - def _initialize(self, use_gpu=False): - # Load Paddlehub ERNIE Tiny pretrained model - self.module = hub.Module(name="ernie_tiny") - inputs, outputs, program = self.module.context( - trainable=True, max_seq_len=128) - - # Download dataset and get its label list and label num - # If you just want labels information, you can omit its tokenizer parameter to avoid preprocessing the train set. - dataset = hub.dataset.Couplet() - self.label_list = dataset.get_labels() - - # Setup RunConfig for PaddleHub Fine-tune API - config = hub.RunConfig( - use_data_parallel=False, - use_cuda=use_gpu, - batch_size=1, - checkpoint_dir=os.path.join(self.directory, "assets", "ckpt"), - strategy=hub.AdamWeightDecayStrategy()) - - # Construct transfer learning network - # Use "pooled_output" for classification tasks on an entire sentence. - # Use "sequence_output" for token-level output. - pooled_output = outputs["pooled_output"] - sequence_output = outputs["sequence_output"] - - # Define a classfication fine-tune task by PaddleHub's API - self.gen_task = hub.TextGenerationTask( - feature=pooled_output, - token_feature=sequence_output, - max_seq_len=128, - num_classes=dataset.num_labels, - config=config, - metrics_choices=["bleu"]) - - def generate(self, texts): - # Add 0x02 between characters to match the format of training data, - # otherwise the length of prediction results will not match the input string - # if the input string contains non-Chinese characters. - formatted_text_a = list(map("\002".join, texts)) - - # Use the appropriate tokenizer to preprocess the data - # For ernie_tiny, it use BertTokenizer too. - tokenizer = hub.BertTokenizer(vocab_file=self.module.get_vocab_path()) - encoded_data = [ - tokenizer.encode(text=text, max_seq_len=128) - for text in formatted_text_a - ] - results = self.gen_task.predict( - data=encoded_data, - label_list=self.label_list, - accelerate_mode=False) - results = [["".join(sample_result) for sample_result in sample_results] - for sample_results in results] - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU for prediction") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate(texts=input_data) - - return results - - @serving - def serving_method(self, texts): - """ - Run as a service. - """ - results = self.generate(texts) - return results - - -if __name__ == '__main__': - module = ErnieTinyCouplet() - results = module.generate(["风吹云乱天垂泪", "若有经心风过耳"]) - for result in results: - print(result) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/__init__.py b/hub_module/modules/text/text_generation/plato2_en_base/models/__init__.py deleted file mode 100644 index 980118807fc1c3bbfae3c0ec3749bc2071f5f7d1..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/__init__.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Define model.""" - -from plato2_en_base.models.model_base import Model - -MODEL_REGISTRY = {} - -__all__ = [ - "MODEL_REGISTRY", "register_model", "create_model", "add_cmdline_args" -] - - -def register_model(name): - """ - Register a new model class. - """ - - def __wrapped__(cls): - if name in MODEL_REGISTRY: - raise ValueError(f"Cannot register duplicate model ({name})") - if not issubclass(cls, Model): - raise ValueError( - f"Model ({name}: {cls.__name__}) must extend Model") - MODEL_REGISTRY[name] = cls - return cls - - return __wrapped__ - - -def create_model(args, place) -> Model: - """ - Create a model. - """ - return MODEL_REGISTRY[args.model](args, place) - - -def add_cmdline_args(parser): - """ Add cmdline argument of Model. """ - group = parser.add_argument_group("Model") - - # Model - group.add_argument("--model", type=str, required=True) - - # Config - group.add_argument("--config_path", type=str, required=True) - - # Model related. - args, _ = parser.parse_known_args() - if args.model not in MODEL_REGISTRY: - raise ValueError(f"Unknown model type: {args.model}") - MODEL_REGISTRY[args.model].add_cmdline_args(parser) - return group - - -import plato2_en_base.models.nsp_model -import plato2_en_base.models.plato diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/generator.py b/hub_module/modules/text/text_generation/plato2_en_base/models/generator.py deleted file mode 100644 index 9d43a25bb7e34474857c32fb77d387d8573dc41a..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/generator.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Generator class""" - -import numpy as np -import paddle.fluid.layers as layers - -from plato2_en_base.utils.args import str2bool - - -class Generator(object): - """ - Generator class - - Use generator in inference phase. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Generator") - group.add_argument("--min_dec_len", type=int, default=1) - group.add_argument("--max_dec_len", type=int, default=64) - - group.add_argument( - "--decoding_strategy", - type=str, - default="topk_sampling", - choices=["beam_search", "topk_sampling", "topp_sampling"]) - group.add_argument("--temperature", type=float, default=1.) - group.add_argument("--ignore_unk", type=str2bool, default=True) - - # multi sampling - group.add_argument("--num_samples", type=int, default=None) - - # top-k sampling - group.add_argument("--topk", type=int, default=10) - - # top-p sampling - group.add_argument("--topp", type=float, default=0.9) - - # beam search - group.add_argument("--beam_size", type=int, default=10) - group.add_argument("--length_average", type=str2bool, default=True) - group.add_argument("--length_penalty", type=float, default=0.0) - - return group - - def __init__(self, args): - self.min_dec_len = args.min_dec_len - self.max_dec_len = args.max_dec_len - self.eos_id = args.eos_id - self.unk_id = args.unk_id - self.mask_id = args.mask_id - self.vocab_size = args.vocab_size - - # model related - - # basic settings - self.decoding_strategy = args.decoding_strategy - self.ignore_unk = args.ignore_unk - self.continuous_position = args.continuous_position - self.temperature = args.temperature - - # reranking - self.num_samples = args.num_samples - - # top-k sampling - self.topk = args.topk - - # top-p sampling - self.topp = args.topp - - # beam search - self.beam_size = args.beam_size - self.length_penalty = args.length_penalty - self.length_average = args.length_average - return - - def inference(self, model, inputs, outputs): - """ - Run inference. - - Args: - inputs(dict): Its key is input name(str) and its value is a Variable. - model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`. - - Returns: - dict(str:Variable): Its key is output name(str) and its value is a Variable. - """ - # prepare while loop - max_len = layers.fill_constant( - shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True) - min_len = layers.fill_constant( - shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True) - step_idx = layers.fill_constant( - shape=[1], dtype="int64", value=0, force_cpu=True) - - ids = layers.array_write( - layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx) - pos_biases = layers.array_write( - layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx) - scores = layers.array_write(inputs["init_score"], step_idx) - tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], - step_idx) - parent_idx = inputs["parent_idx"] - - if self.decoding_strategy == "beam_search": - beam_size = self.beam_size - else: - beam_size = 1 - - eos_penalty = np.zeros(self.vocab_size, dtype="float32") - eos_penalty[self.eos_id] = -1e9 - eos_penalty = layers.assign(eos_penalty) - - token_penalty = np.zeros(self.vocab_size, dtype="float32") - token_penalty[self.unk_id] = -1e9 - if self.mask_id >= 0: - token_penalty[self.mask_id] = -1e9 - token_penalty = layers.assign(token_penalty) - - # start while loop - cond = layers.less_than(x=step_idx, y=max_len) - while_op = layers.While(cond) - with while_op.block(): - pre_ids = layers.array_read(array=ids, i=step_idx) - pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) - pre_scores = layers.array_read(array=scores, i=step_idx) - pos_bias = layers.array_read(array=pos_biases, i=step_idx) - pos_bias = layers.gather(input=pos_bias, index=parent_idx) - - tmp_tgt_generation_mask = layers.array_read( - tgt_generation_mask, i=step_idx) - dtype = tmp_tgt_generation_mask.dtype - - append_mask = layers.fill_constant_batch_size_like( - input=pre_ids, value=1.0, shape=[-1, 1, 1], dtype=dtype) - tmp_tgt_generation_mask = layers.concat( - [tmp_tgt_generation_mask, append_mask], axis=2) - pre_mask = tmp_tgt_generation_mask = layers.gather( - input=tmp_tgt_generation_mask, index=parent_idx) - - pre_sent = layers.fill_constant_batch_size_like( - input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype) - - if self.continuous_position: - pre_pos = layers.elementwise_mul( - x=layers.fill_constant_batch_size_like( - input=pre_mask, - value=1, - shape=[-1, 1, 1], - dtype=pre_ids.dtype), - y=step_idx, - axis=0) + pos_bias - else: - pre_pos = layers.elementwise_mul( - x=layers.fill_constant_batch_size_like( - input=pre_mask, - value=1, - shape=[-1, 1, 1], - dtype=pre_ids.dtype), - y=step_idx, - axis=0) - - dec_out, _ = model._generation_network( - token_ids=pre_ids, - type_ids=pre_sent, - pos_ids=pre_pos, - generation_mask=tmp_tgt_generation_mask, - gather_idx=parent_idx) - logits = model._calc_logits(dec_out) - - # ignore unk and mask token - if self.ignore_unk: - logits = layers.elementwise_add(logits, token_penalty, axis=1) - - # min dec length - min_len_cond = layers.less_than(x=step_idx, y=min_len) - - def min_len_penalty(): - """Plus minimum length penalty.""" - return layers.elementwise_add(logits, eos_penalty, axis=1) - - def no_penalty(): - """No penalty.""" - return logits - - logits = layers.case([(min_len_cond, min_len_penalty)], - default=no_penalty) - - # get probs - probs = layers.softmax(logits / self.temperature) - - if self.decoding_strategy == "beam_search": - topk_scores, topk_indices = layers.topk( - input=probs, k=beam_size) - else: - if self.decoding_strategy.startswith("sampling"): - sampling_ids = layers.sampling_id(probs, dtype="int") - elif self.decoding_strategy.startswith("topk_sampling"): - topk_probs, _ = layers.topk(input=probs, k=self.topk) - ge_cond = layers.cast( - layers.greater_equal( - probs, layers.unsqueeze(topk_probs[:, -1], [1])), - "float32") - old_probs = probs - probs = probs * ge_cond / layers.reduce_sum( - topk_probs, dim=-1, keep_dim=True) - sampling_ids = layers.sampling_id(probs, dtype="int") - probs = old_probs - elif self.decoding_strategy.startswith("topp_sampling"): - sorted_probs, sorted_idx = layers.argsort( - probs, descending=True) - cum_sorted_probs = layers.cumsum( - sorted_probs, axis=1, exclusive=True) - lt_cond = layers.cast( - layers.less_than( - cum_sorted_probs, - layers.fill_constant_batch_size_like( - cum_sorted_probs, cum_sorted_probs.shape, - cum_sorted_probs.dtype, self.topp)), "float32") - old_probs = probs - candidate_probs = sorted_probs * lt_cond - probs = candidate_probs / layers.reduce_sum( - candidate_probs, dim=-1, keep_dim=True) - sampling_ids = layers.sampling_id(probs, dtype="int") - sampling_ids = layers.index_sample( - sorted_idx, layers.unsqueeze(sampling_ids, [1])) - sampling_ids = layers.squeeze(sampling_ids, [1]) - probs = old_probs - else: - raise ValueError(self.decoding_strategy) - - sampling_scores = layers.one_hot( - layers.unsqueeze(sampling_ids, [1]), probs.shape[1]) - sampling_scores = sampling_scores * probs - ( - 1 - sampling_scores) * 1e3 - topk_scores, topk_indices = layers.topk( - input=sampling_scores, k=1) - - pre_len = layers.cast(step_idx, "float32") - layers.increment(x=step_idx, value=1.0, in_place=True) - cur_len = layers.cast(step_idx, "float32") - - # update scores - if self.length_average: - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), y=pre_scores * pre_len, - axis=0) / cur_len - elif self.length_penalty > 0: - pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty) - cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty) - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), y=pre_scores * pre_lp, - axis=0) / cur_lp - else: - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), y=pre_scores, axis=0) - topk_indices = layers.lod_reset(topk_indices, pre_ids) - accu_scores = layers.lod_reset(accu_scores, pre_ids) - selected_ids, selected_scores, gather_idx = layers.beam_search( - pre_ids=pre_ids, - pre_scores=pre_scores, - ids=topk_indices, - scores=accu_scores, - beam_size=beam_size, - end_id=self.eos_id, - return_parent_idx=True) - - layers.array_write(selected_ids, i=step_idx, array=ids) - layers.array_write(selected_scores, i=step_idx, array=scores) - layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask) - layers.array_write(pos_bias, i=step_idx, array=pos_biases) - - layers.assign(gather_idx, parent_idx) - - length_cond = layers.less_than(x=step_idx, y=max_len) - finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) - layers.logical_and(x=length_cond, y=finish_cond, out=cond) - - finished_ids, finished_scores = layers.beam_search_decode( - ids, scores, beam_size=beam_size, end_id=self.eos_id) - - predictions = { - "finished_ids": finished_ids, - "finished_scores": finished_scores, - "token_ids": inputs["token_ids"], - "data_id": inputs["data_id"] - } - return predictions diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/model_base.py b/hub_module/modules/text/text_generation/plato2_en_base/models/model_base.py deleted file mode 100644 index fa505803dc3350fc6d5fb938919cf0d85abddcb8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/model_base.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Model base.""" - -from abc import abstractmethod, ABC - -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy -import paddle.fluid.incubate.fleet.base.role_maker as role_maker -import paddle.fluid.layers as layers - -from plato2_en_base.models.optimizer import AdamW -from plato2_en_base.utils import init_pretraining_params, init_checkpoint, to_lodtensor -from plato2_en_base.utils.args import str2bool - - -class Model(ABC): - """ - Basic model wrapper for paddle. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Model") - # Init checkpoint - group.add_argument("--init_checkpoint", type=str, default="") - group.add_argument("--init_pretraining_params", type=str, default="") - - # Optimizer - group.add_argument( - "-lr", - "--learning_rate", - type=float, - default=1e-5, - help="The learning rate for optimizer.") - group.add_argument( - "--warmup_steps", type=int, default=0, help="The warmup steps.") - group.add_argument( - "--weight_decay", - type=float, - default=0.0, - help="The weight decay for optimizer.") - group.add_argument( - "--max_grad_norm", - type=float, - default=.1, - help="The maximum norm of gradient.") - - group.add_argument("--use_recompute", type=str2bool, default=False) - group.add_argument("--use_amp", type=str2bool, default=False) - group.add_argument("--amp_loss_scaling", type=float, default=12800) - return group - - def __init__(self, args, place): - self.place = place - self.exe = fluid.Executor(place) - - self.init_checkpoint = args.init_checkpoint - self.init_pretraining_params = args.init_pretraining_params - - self.learning_rate = args.learning_rate - self.warmup_steps = args.warmup_steps - self.weight_decay = args.weight_decay - self.max_grad_norm = args.max_grad_norm - - self.is_distributed = args.is_distributed - self.use_recompute = args.use_recompute - self.use_amp = args.use_amp - self.amp_loss_scaling = args.amp_loss_scaling - self.run_infer = args.get("run_infer", False) - self.batch_size = args.get("batch_size", 1) - self._build_programs() - return - - def _build_programs(self): - """ - Build programs. - - Build train_program, eval_program and inference_program. Only use in static graph mode. - """ - if self.run_infer: - self.startup_program = fluid.Program() - # build infer program - self.infer_program = fluid.Program() - with fluid.program_guard(self.infer_program, self.startup_program): - with fluid.unique_name.guard(): - self.infer_feed_dict = inputs = self._get_feed_dict( - is_infer=True) - outputs = self.forward(inputs, is_infer=True) - predictions = self.infer(inputs, outputs) - self.infer_fetch_dict = predictions - self.infer_program = self.infer_program.clone(for_test=True) - - self.program = self.infer_program - else: - if self.is_distributed: - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.use_experimental_executor = True - exec_strategy.num_threads = 4 - exec_strategy.num_iteration_per_drop_scope = 1 - - dist_strategy = DistributedStrategy() - dist_strategy.exec_strategy = exec_strategy - dist_strategy.nccl_comm_num = 1 - dist_strategy.fuse_all_reduce_ops = True - if self.use_recompute: - dist_strategy.forward_recompute = True - dist_strategy.enable_sequential_execution = True - if self.use_amp: - dist_strategy.use_amp = True - dist_strategy.amp_loss_scaling = self.amp_loss_scaling - self.dist_strategy = dist_strategy - - self.startup_program = fluid.Program() - # build train program - self.train_program = fluid.Program() - with fluid.program_guard(self.train_program, self.startup_program): - with fluid.unique_name.guard(): - self.feed_dict = inputs = self._get_feed_dict() - outputs = self.forward(inputs) - if self.is_distributed and self.use_recompute: - self.dist_strategy.recompute_checkpoints = outputs[ - "checkpoints"] - metrics, statistics = self.get_metrics_and_statistics( - inputs, outputs) - - # build eval program - self.eval_program = self.train_program.clone(for_test=True) - self.eval_fetch_dict = {**metrics, **statistics} - - scheduled_lr = self.optimize(metrics) - metrics["scheduled_lr"] = scheduled_lr - self.train_fetch_dict = metrics - - self.program = self.train_program - if self.is_distributed: - self.train_program = fleet.main_program - - self.exe.run(self.startup_program) - if self.init_pretraining_params != "": - init_pretraining_params(self.exe, self.init_pretraining_params, - self.program) - elif self.init_checkpoint != "": - init_checkpoint(self.exe, self.init_checkpoint, self.program) - return - - def load(self, model_dir, is_checkpoint=False): - """ - Load persistables or parameters. - """ - # TODO: support dygraph. - if is_checkpoint: - init_checkpoint(self.exe, model_dir, self.program) - else: - init_pretraining_params(self.exe, model_dir, self.program) - return - - def save(self, model_dir, is_checkpoint=False): - """ - Save persistables or parameters. - """ - # TODO: support dygraph. - if is_checkpoint: - fluid.io.save_persistables(self.exe, model_dir, self.program) - else: - fluid.io.save_params(self.exe, model_dir, self.program) - return - - @abstractmethod - def _get_feed_dict(self, is_infer=False): - """ - Return input feed list. - """ - pass - - def _get_feed(self, inputs, is_infer=False): - """ - Convert `inputs` into model's feed data format. - """ - if isinstance(inputs, list): - # return list direclty which is used in `get_data_loader`. - return inputs - for k in inputs: - if isinstance(inputs[k], list): - inputs[k] = to_lodtensor(inputs[k], self.place) - return inputs - - def get_data_loader(self, generator=None, is_infer=False): - """ - Return DataLoader. - - If generator is not `None`, the data loader set it as the batch generator. - """ - # TODO: support dygraph. - if is_infer: - feed_name_list, feed_list = zip(*self.infer_feed_dict.items()) - else: - feed_name_list, feed_list = zip(*self.feed_dict.items()) - loader = fluid.io.DataLoader.from_generator( - feed_list=feed_list, - capacity=64, - use_double_buffer=True, - iterable=True) - if generator is not None: - - def __wrapper__(): - for batch in generator(): - batch = self._get_feed(batch) - batch = [ - batch[name] for name in feed_name_list if name in batch - ] - yield batch - - loader.set_batch_generator(__wrapper__, self.place) - return loader - - @abstractmethod - def forward(self, inputs, is_infer=False): - """ - Run model main forward. - """ - pass - - @abstractmethod - def get_metrics_and_statistics(self, inputs, outputs): - """ - Get metrics and statistics. - """ - pass - - @abstractmethod - def infer(self, inputs, outputs): - """ - Run model inference. - """ - pass - - def optimize(self, metrics): - """ - Optimize the model by metrics(mainly `metrics["loss"]`). - """ - # TODO: support dygraph - if self.warmup_steps > 0: - scheduled_lr = layers.learning_rate_scheduler.noam_decay( - 1 / (self.warmup_steps * (self.learning_rate**2)), - self.warmup_steps) - else: - scheduled_lr = layers.create_global_var( - name=fluid.unique_name.generate("learning_rate"), - shape=[1], - value=self.learning_rate, - dtype="float32", - persistable=True) - grad_clip = fluid.clip.GradientClipByGlobalNorm(self.max_grad_norm) - - self.optimizer = AdamW( - learning_rate=scheduled_lr, - grad_clip=grad_clip, - weight_decay=self.weight_decay) - - if self.is_distributed: - self.optimizer = fleet.distributed_optimizer( - self.optimizer, strategy=self.dist_strategy) - - self.optimizer.minimize(metrics["loss"]) - return scheduled_lr - - def _execute(self, program, feed, fetch_dict, **kwargs): - """ - Execute program. - """ - fetch_list = [var.name for var in fetch_dict.values()] - fetch_vars = self.exe.run(program, feed, fetch_list, **kwargs) - return dict(zip(fetch_dict.keys(), fetch_vars)) - - def train_step(self, inputs): - """ - Run one training step. - """ - # TODO: support dygraph. - return self._execute( - self.train_program, - self._get_feed(inputs), - self.train_fetch_dict, - use_program_cache=True) - - def eval_step(self, inputs): - """ - Run one evaluation step. - """ - # TODO: support dygraph. - return self._execute(self.eval_program, self._get_feed(inputs), - self.eval_fetch_dict) - - def infer_step(self, inputs): - """ - Run one inference step. - """ - # TODO: support dygraph. - return self._execute(self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict) - - def save_inference_model(self, inference_model_path): - """ - Save the inference model. - """ - feed_list = [var.name for var in self.infer_feed_dict.values()] - fetch_list = list(self.infer_fetch_dict.values()) - - fluid.io.save_inference_model(inference_model_path, feed_list, - fetch_list, self.exe, self.infer_program) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/nsp_model.py b/hub_module/modules/text/text_generation/plato2_en_base/models/nsp_model.py deleted file mode 100644 index 93f9c4bb648ec73ae21c1088a64f140ac26d47c6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/nsp_model.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""NSP model.""" - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - -from . import register_model -from .model_base import Model -from .unified_transformer import UnifiedTransformer - - -@register_model("NSPModel") -class NSPModel(UnifiedTransformer): - """NSP model.""" - - def _get_feed_dict(self, is_infer=False): - """ - Get the feed list of the model. - - Args: - is_infer(bool): True if running inference. - - Returns: - list(Variable): The feed list. - list(str): The name of each Variable in feed list. - """ - feed_dict = {} - feed_dict["token_ids"] = layers.data( - name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["type_ids"] = layers.data( - name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["pos_ids"] = layers.data( - name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - - feed_dict["attention_mask"] = layers.data( - name="attention_mask", - shape=[-1, self.max_seq_len, self.max_seq_len], - dtype=self.dtype) - feed_dict["label_pos"] = layers.data( - name="label_pos", shape=[-1, 1], dtype="int64") - - if not is_infer: - feed_dict["label"] = layers.data( - name="label", shape=[-1, 1], dtype="int64") - feed_dict["tgt_label"] = layers.data( - name="tgt_ids", shape=[-1, 1], dtype="int64") - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", shape=[-1, 1], dtype="int64") - - feed_dict["data_id"] = layers.data( - name="data_id", shape=[-1, 1], dtype="int64") - return feed_dict - - def _get_feed(self, inputs, is_infer=False): - return Model._get_feed(self, inputs, is_infer) - - def forward(self, inputs, is_infer=False): - outputs = {} - self.generation_caches = None - outputs["enc_out"], self.checkpoints = self._generation_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - generation_mask=inputs["attention_mask"]) - return outputs - - def _get_metrics(self, inputs, outputs): - metrics = {} - fc_out = self._calc_logits(outputs["enc_out"], inputs["tgt_pos"]) - lm_loss = layers.softmax_with_cross_entropy( - logits=fc_out, label=inputs["tgt_pos"]) - need_cal = layers.not_equal( - inputs["tgt_label"], - layers.fill_constant(shape=[1], dtype="int64", value=1)) - need_cal = layers.cast(need_cal, self.dtype) - mean_lm_loss = layers.reduce_sum( - lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10) - - pooled_out = self._get_pooled_output(outputs["enc_out"], - inputs["label_pos"]) - nsp_fc_out = layers.fc( - input=pooled_out, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self.param_initializer), - bias_attr="next_sent_fc.b_0") - nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy( - logits=nsp_fc_out, label=inputs["label"], return_softmax=True) - - nsp_acc = layers.accuracy(nsp_softmax, inputs["label"]) - mean_nsp_loss = layers.mean(nsp_loss) - - metrics["loss"] = mean_lm_loss + mean_nsp_loss - metrics["lm_loss"] = mean_lm_loss - metrics["nsp_loss"] = mean_nsp_loss - metrics["nsp_acc"] = nsp_acc - return metrics - - def infer(self, inputs, outputs): - pooled_out = self._get_pooled_output(outputs["enc_out"], - inputs["label_pos"]) - nsp_fc_out = layers.fc( - input=pooled_out, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self.param_initializer), - bias_attr="next_sent_fc.b_0") - scores = layers.softmax(nsp_fc_out) - predictions = {"scores": scores, "data_id": inputs["data_id"]} - return predictions - - def infer_step(self, inputs): - return Model.infer_step(self, inputs) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/optimizer.py b/hub_module/modules/text/text_generation/plato2_en_base/models/optimizer.py deleted file mode 100644 index 38a9cd36fb490134427e67fa2909dbae48ed9f8c..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/optimizer.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Optimizer.""" - -import re - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -class AdamW(fluid.optimizer.AdamOptimizer): - """AdamW object for dygraph""" - - def __init__(self, *args, **kwargs): - weight_decay = kwargs.pop('weight_decay', None) - var_name_to_exclude = kwargs.pop( - 'var_name_to_exclude', '.*layer_norm_scale|.*layer_norm_bias|.*b_0') - super(AdamW, self).__init__(*args, **kwargs) - self.wd = weight_decay - self.pat = re.compile(var_name_to_exclude) - - def apply_optimize(self, loss, startup_program, params_grads): - """Update params with weight decay.""" - super(AdamW, self).apply_optimize(loss, startup_program, params_grads) - for p, g in params_grads: - if not self.pat.match(p.name): - layers.assign(p * (1. - self.wd * self._learning_rate), p) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/plato.py b/hub_module/modules/text/text_generation/plato2_en_base/models/plato.py deleted file mode 100644 index 7365f7fbb6fbf3e755047026d198eb671f0b30e7..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/plato.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Plato model.""" - -import numpy as np -import paddle.fluid as fluid -import paddle.fluid.layers as layers - -from . import register_model -from .model_base import Model -from .unified_transformer import UnifiedTransformer -from .transformer_block import encoder, pre_process_layer -from plato2_en_base.utils import repeat_array_or_tensor -from plato2_en_base.utils.args import str2bool -from .generator import Generator - - -@register_model("Plato") -class Plato(UnifiedTransformer): - """Plato model.""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = UnifiedTransformer.add_cmdline_args(parser) - group.add_argument("--use_bow", type=str2bool, default=True) - group.add_argument("--use_entropy", type=str2bool, default=False) - return group - - def __init__(self, args, place): - # latent related - self.mask_id = args.mask_id - self.latent_type_size = args.latent_type_size - self.latent_emb_name = "latent_embedding" - self.use_bow = args.use_bow - self.use_entropy = args.use_entropy - - super(Plato, self).__init__(args, place) - - def _get_feed_dict(self, is_infer=False): - feed_dict = {} - feed_dict["token_ids"] = layers.data( - name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["type_ids"] = layers.data( - name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["pos_ids"] = layers.data( - name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - - if not is_infer: - feed_dict["recognition_mask"] = layers.data( - name="recognition_mask", - shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], - dtype=self.dtype) - feed_dict["generation_mask"] = layers.data( - name="generation_mask", - shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], - dtype=self.dtype) - - if is_infer: - feed_dict["tgt_ids"] = layers.data( - name="tgt_ids", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["init_score"] = layers.data( - name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) - feed_dict["parent_idx"] = layers.data( - name="parent_idx", shape=[-1], dtype="int64") - - feed_dict["tgt_generation_mask"] = layers.data( - name="tgt_generation_mask", - shape=[-1, 1, self.max_seq_len + 1], - dtype="float32") - feed_dict["latent_id"] = layers.data( - name="latent_id", shape=[-1, 1], dtype="int64") - else: - feed_dict["tgt_label"] = layers.data( - name="tgt_label", shape=[-1, 1], dtype="int64") - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", shape=[-1, 1], dtype="int64") - - if self.use_bow: - feed_dict["bow_label"] = layers.data( - name="bow_label", shape=[-1, 1], dtype="int64") - feed_dict["bow_pos"] = layers.data( - name="bow_pos", shape=[-1, 1], dtype="int64") - - feed_dict["data_id"] = layers.data( - name="data_id", shape=[-1, 1], dtype="int64") - return feed_dict - - def _recognition_network(self, token_ids, type_ids, pos_ids, - recognition_mask): - mask_id = layers.fill_constant_batch_size_like( - input=token_ids, - shape=[-1, 1, 1], - value=self.mask_id, - dtype="int64") - mask_emb = layers.embedding( - input=mask_id, - size=[self.vocab_size, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.token_emb_name, initializer=self.param_initializer)) - emb_out, n_head_self_attn_mask = self._gen_input( - token_ids, type_ids, pos_ids, recognition_mask, aux_emb=mask_emb) - - recognition_out, checkpoints = self._encode(emb_out, - n_head_self_attn_mask) - - recognition_feat = layers.slice( - input=recognition_out, axes=[1], starts=[0], ends=[1]) - recognition_feat = layers.fc( - input=recognition_feat, - size=self.hidden_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="recognition_fc.w_0", initializer=self.param_initializer), - bias_attr="recognition_fc.b_0") - logits = layers.fc( - input=recognition_feat, - size=self.latent_type_size, - param_attr=fluid.ParamAttr( - name=self.latent_emb_name, initializer=self.param_initializer), - bias_attr="recognition_bias") - return logits, checkpoints - - def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): - u = layers.uniform_random_batch_size_like( - logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) - u.stop_gradient = True - gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) - y = logits + gumbel - return layers.softmax(y / tau) - - def forward(self, inputs, is_infer=False): - """ - Run model main forward. - """ - outputs = {} - if is_infer: - self.generation_caches = [{ - "k": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_key * self.n_head], - dtype=self.dtype, - value=0), - "v": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_value * self.n_head], - dtype=self.dtype, - value=0), - } for i in range(self.n_layer)] - else: - self.generation_caches = None - - latent_embeddings = layers.create_parameter( - shape=[self.emb_size, self.latent_type_size], - dtype=self.dtype, - attr=fluid.ParamAttr( - name=self.latent_emb_name, initializer=self.param_initializer)) - - if is_infer: - latent_id = inputs["latent_id"] - weights = layers.one_hot(latent_id, self.latent_type_size) - else: - logits, recognition_checkpoints = self._recognition_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - recognition_mask=inputs["recognition_mask"], - ) - outputs["post_probs"] = layers.softmax(logits) - weights = self._gumbel_softmax(logits) - outputs["checkpoints"] = recognition_checkpoints - - latent_emb = layers.matmul( - x=weights, y=latent_embeddings, transpose_y=True) - outputs["enc_out"], generation_checkpoints = self._generation_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - generation_mask=inputs["generation_mask"], - aux_emb=layers.unsqueeze(latent_emb, axes=[1]), - gather_idx=inputs.get("parent_idx", None), - ) - - if not is_infer: - outputs["checkpoints"].extend(generation_checkpoints) - return outputs - - def _calc_bow_logits(self, enc_out, checkpoints, bow_pos): - """Get the logits of generation.""" - bow_feat = layers.slice(input=enc_out, axes=[1], starts=[0], ends=[1]) - bow_feat = layers.reshape(x=bow_feat, shape=[-1, self.hidden_size]) - bow_pos = layers.cast(x=bow_pos, dtype="int32") - bow_feat = layers.gather(input=bow_feat, index=bow_pos) - - bow_trans_feat = layers.fc( - input=bow_feat, - size=self.emb_size, - act=self.hidden_act, - param_attr=fluid.ParamAttr( - name="bow_trans_fc.w_0", initializer=self.param_initializer), - bias_attr=fluid.ParamAttr(name="bow_trans_fc.b_0")) - - bow_trans_feat = pre_process_layer( - bow_trans_feat, self.post_cls_cmd, name="bow_trans") - - checkpoints.append(bow_trans_feat) - - if self.weight_sharing: - fc_out = layers.matmul( - x=bow_trans_feat, - y=fluid.default_main_program().global_block().var( - self.token_emb_name), - transpose_y=True) - if self.cls_bias: - fc_out += layers.create_parameter( - shape=[self.vocab_size], - dtype=self.dtype, - attr=fluid.ParamAttr(name="bow_out_fc.b_0"), - is_bias=True) - else: - bow_out_bias_attr = fluid.ParamAttr( - name="bow_out_fc.b_0") if self.cls_bias else False - fc_out = layers.fc( - input=bow_trans_feat, - size=self.vocab_size, - param_attr=fluid.ParamAttr( - name="bow_out_fc.w_0", initializer=self.param_initializer), - bias_attr=bow_out_bias_attr) - return fc_out - - def _get_metrics(self, inputs, outputs): - metrics = super(Plato, self)._get_metrics(inputs, outputs) - - if self.use_bow: - fc_out = self._calc_bow_logits( - outputs["enc_out"], outputs["checkpoints"], inputs["bow_pos"]) - bow_loss = layers.softmax_with_cross_entropy( - logits=fc_out, label=inputs["bow_label"]) - mean_bow_loss = layers.mean(bow_loss) - metrics["token_bow_loss"] = mean_bow_loss - metrics["loss"] = metrics["loss"] + mean_bow_loss - - entropy_loss = layers.reduce_sum( - outputs["post_probs"] * layers.log(outputs["post_probs"]), dim=1) - mean_entropy_loss = layers.mean(entropy_loss) - metrics["entropy_loss"] = mean_entropy_loss - if self.use_entropy: - metrics["loss"] = metrics["loss"] + mean_entropy_loss - return metrics - - def infer_step(self, inputs): - """ - Run one inference step. - """ - if self.do_generation: - batch_size = len(inputs["data_id"]) - new_bsz = batch_size * self.latent_type_size - inputs = { - name: repeat_array_or_tensor(array_or_tensor, self.place, - self.latent_type_size) - for name, array_or_tensor in inputs.items() - } - # Add latent_id - inputs["latent_id"] = np.array([ - i for i in range(self.latent_type_size) - for _ in range(batch_size) - ], - dtype="int64").reshape([-1, 1]) - - return super(Plato, self).infer_step(inputs) - else: - return self._execute(self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/transformer_block.py b/hub_module/modules/text/text_generation/plato2_en_base/models/transformer_block.py deleted file mode 100644 index 3d98a8a15b14092a4807d66760058635760e4cde..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/transformer_block.py +++ /dev/null @@ -1,392 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer block.""" - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - gather_idx=None, - store=False, - param_initializer=None, - name="multi_head_att"): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_query_fc.w_0", initializer=param_initializer), - bias_attr=name + "_query_fc.b_0") - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_key_fc.w_0", initializer=param_initializer), - bias_attr=name + "_key_fc.b_0") - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_value_fc.w_0", initializer=param_initializer), - bias_attr=name + "_value_fc.b_0") - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product, use_cudnn=True) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - cache_k, cache_v = cache["k"], cache["v"] - select_k = layers.gather(cache_k, index=gather_idx) - select_v = layers.gather(cache_v, index=gather_idx) - select_k = layers.reshape(select_k, shape=[0, 0, d_key * n_head]) - select_v = layers.reshape(select_v, shape=[0, 0, d_value * n_head]) - if store: - k = layers.concat([select_k, k], axis=1) - v = layers.concat([select_v, v], axis=1) - layers.assign(k, cache["k"]) - layers.assign(v, cache["v"]) - else: - #k = select_k - #v = select_v - tmp_k = layers.concat([select_k, k[:, :1]], axis=1) - tmp_v = layers.concat([select_v, v[:, :1]], axis=1) - layers.assign(tmp_k, cache["k"]) - layers.assign(tmp_v, cache["v"]) - k = layers.concat([select_k, k], axis=1) - v = layers.concat([select_v, v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_output_fc.w_0", initializer=param_initializer), - bias_attr=name + "_output_fc.b_0") - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name="ffn"): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + "_fc_0.w_0", initializer=param_initializer), - bias_attr=name + "_fc_0.b_0") - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_fc_1.w_0", initializer=param_initializer), - bias_attr=name + "_fc_1.b_0") - return out - - -def pre_post_process_layer(prev_out, - out, - process_cmd, - dropout_rate=0., - epsilon=1e-5, - name=""): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + "_layer_norm_scale", - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + "_layer_norm_bias", - initializer=fluid.initializer.Constant(0.)), - epsilon=epsilon) - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name="", - epsilon=1e-5, - cache=None, - gather_idx=None, - store=False): - """ - The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the pre_process_layer / post_process_layer to add residual connection, - layer normalization and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - input, - preprocess_cmd, - prepostprocess_dropout, - epsilon=epsilon, - name=name + "_pre_att"), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + "_multi_head_att", - cache=cache, - gather_idx=gather_idx, - store=store) - attn_output = post_process_layer( - input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + "_post_att", - epsilon=epsilon) - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - epsilon=epsilon, - name=name + "_pre_ffn"), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + "_ffn") - ffd_output = post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + "_post_ffn", - epsilon=epsilon) - return ffd_output, [attn_output, ffd_output] - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name="", - epsilon=1e-5, - n_layer_per_block=1, - param_share="normal", - caches=None, - gather_idx=None, - store=False): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - checkpoints = [] - names = [] - if param_share == "inner_share": - for _ in range(n_layer // n_layer_per_block): - for i in range(n_layer_per_block): - names.append(name + "_layer_" + str(i)) - else: - for i in range(n_layer // n_layer_per_block): - for _ in range(n_layer_per_block): - names.append(name + "_layer_" + str(i)) - - for i in range(n_layer): - enc_output, cps = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - epsilon=epsilon, - name=names[i], - cache=caches[i] if caches is not None else None, - gather_idx=gather_idx, - store=store) - checkpoints.extend(cps) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, - preprocess_cmd, - prepostprocess_dropout, - name="post_encoder", - epsilon=epsilon) - - return enc_output, checkpoints diff --git a/hub_module/modules/text/text_generation/plato2_en_base/models/unified_transformer.py b/hub_module/modules/text/text_generation/plato2_en_base/models/unified_transformer.py deleted file mode 100644 index cca4ab9bac1d1a894a30f5ea82f69a5a5468c93c..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/models/unified_transformer.py +++ /dev/null @@ -1,457 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Unified Transformer model.""" - -import numpy as np -import paddle.fluid as fluid -import paddle.fluid.layers as layers - -from . import register_model -from .model_base import Model -from .transformer_block import encoder, pre_process_layer -from plato2_en_base.utils.args import str2bool -from plato2_en_base.utils import repeat_array_or_tensor, slice_array_or_tensor -from .generator import Generator - - -@register_model("UnifiedTransformer") -class UnifiedTransformer(Model): - """Unified Transformer""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = Model.add_cmdline_args(parser) - group.add_argument("--max_seq_len", type=int, default=256) - group.add_argument("--weight_sharing", type=str2bool, default=True) - group.add_argument("--mem_efficient", type=str2bool, default=False) - - Generator.add_cmdline_args(parser) - return group - - def __init__(self, args, place): - self.max_seq_len = args.max_seq_len - - self.emb_size = args.emb_size or args.hidden_size - self.hidden_size = args.hidden_size - - self.n_layer = args.num_hidden_layers - self.n_head = args.num_attention_heads - self.d_key = args.get("key_size", self.hidden_size // self.n_head) - self.d_value = args.get("value_size", self.hidden_size // self.n_head) - self.inner_hidden_size = args.get("inner_hidden_size", - self.hidden_size * 4) - - self.vocab_size = args.vocab_size - self.max_position_seq_len = args.max_position_embeddings - self.type_size = args.type_vocab_size - self.token_emb_name = "word_embedding" - self.type_emb_name = "sent_embedding" - self.pos_emb_name = "pos_embedding" - - self.epsilon = args.epsilon or 1e-5 - self.n_layer_per_block = args.n_layer_per_block or 1 - self.pre_encoder_cmd = args.get("pre_encoder_cmd", "nd") - self.preprocess_cmd = args.get("preprocess_cmd", "") - self.postprocess_cmd = args.get("postprocess_cmd", "dan") - self.post_cls_cmd = args.get("post_cls_cmd", "n") - self.cls_bias = args.get("cls_bias", True) - if self.hidden_size != self.emb_size: - self.emb_mapping_in = True - else: - self.emb_mapping_in = args.get("emb_mapping_in", False) - - self.hidden_act = args.hidden_act - self.prepostprocess_dropout = args.hidden_dropout_prob - self.attention_dropout = args.attention_probs_dropout_prob - self.weight_sharing = args.weight_sharing - - self.mem_efficient = args.mem_efficient - - self.dtype = "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self.param_initializer = fluid.initializer.TruncatedNormal( - scale=args.initializer_range) - - # task-related - self.generator = Generator(args) - self.do_generation = args.do_generation - - super(UnifiedTransformer, self).__init__(args, place) - - def _gen_input(self, token_ids, type_ids, pos_ids, input_mask, - aux_emb=None): - token_emb_out = layers.embedding( - input=token_ids, - size=[self.vocab_size, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.token_emb_name, initializer=self.param_initializer)) - type_emb_out = layers.embedding( - input=type_ids, - size=[self.type_size, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.type_emb_name, initializer=self.param_initializer)) - pos_emb_out = layers.embedding( - input=pos_ids, - size=[self.max_position_seq_len, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.pos_emb_name, initializer=self.param_initializer)) - emb_out = token_emb_out + type_emb_out + pos_emb_out - - # auxiliary memory embeddings - if aux_emb is not None: - emb_out = layers.concat([aux_emb, emb_out], axis=1) - - # post process of embedding - emb_out = pre_process_layer( - emb_out, - self.pre_encoder_cmd, - self.prepostprocess_dropout, - name="pre_encoder", - epsilon=self.epsilon) - if self.emb_mapping_in: - emb_out = layers.fc( - input=emb_out, - num_flatten_dims=2, - size=self.hidden_size, - param_attr=fluid.ParamAttr( - name="emb_hidden_mapping", - initializer=self.param_initializer), - bias_attr="emb_hidden_mapping_bias") - - # generate n-head self-attention mask - self_attn_mask = input_mask - self_attn_mask = layers.scale( - x=self_attn_mask, scale=1e4, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = layers.stack( - x=[self_attn_mask] * self.n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - return emb_out, n_head_self_attn_mask - - def _get_pooled_output(self, enc_out, pos): - enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) - pos = layers.cast(x=pos, dtype="int32") - feat = layers.gather(input=enc_out, index=pos) - - pooled_out = layers.fc( - input=feat, - size=self.hidden_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self.param_initializer), - bias_attr="pooled_fc.b_0") - return pooled_out - - def _generation_network(self, - token_ids, - type_ids, - pos_ids, - generation_mask, - aux_emb=None, - gather_idx=None): - emb_out, n_head_self_attn_mask = self._gen_input( - token_ids, type_ids, pos_ids, generation_mask, aux_emb=aux_emb) - return self._encode( - emb_out, - n_head_self_attn_mask, - self.generation_caches, - gather_idx=gather_idx) - - def _encode(self, - emb_out, - n_head_self_attn_mask, - caches=None, - gather_idx=None): - return encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self.n_layer, - n_head=self.n_head, - d_key=self.d_key, - d_value=self.d_value, - d_model=self.hidden_size, - d_inner_hid=self.inner_hidden_size, - prepostprocess_dropout=self.prepostprocess_dropout, - attention_dropout=self.attention_dropout, - relu_dropout=0, - hidden_act=self.hidden_act, - preprocess_cmd=self.preprocess_cmd, - postprocess_cmd=self.postprocess_cmd, - param_initializer=self.param_initializer, - epsilon=self.epsilon, - n_layer_per_block=self.n_layer_per_block, - name="encoder", - caches=caches, - gather_idx=gather_idx, - store=caches is not None) - - def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): - u = layers.uniform_random_batch_size_like( - logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) - u.stop_gradient = True - gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) - y = logits + gumbel - return layers.softmax(y / tau) - - def _get_feed_dict(self, is_infer=False): - """ - Get the feed list of the model. - - Args: - is_infer(bool): True if running inference. - - Returns: - list(Variable): The feed list. - list(str): The name of each Variable in feed list. - """ - feed_dict = {} - feed_dict["token_ids"] = layers.data( - name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["type_ids"] = layers.data( - name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["pos_ids"] = layers.data( - name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - - feed_dict["generation_mask"] = layers.data( - name="generation_mask", - shape=[-1, self.max_seq_len, self.max_seq_len], - dtype=self.dtype) - - if is_infer: - feed_dict["tgt_ids"] = layers.data( - name="tgt_ids", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["init_score"] = layers.data( - name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) - feed_dict["parent_idx"] = layers.data( - name="parent_idx", shape=[-1], dtype="int64") - - feed_dict["tgt_generation_mask"] = layers.data( - name="tgt_generation_mask", - shape=[-1, 1, self.max_seq_len], - dtype="float32") - else: - feed_dict["tgt_label"] = layers.data( - name="tgt_label", shape=[-1, 1], dtype="int64") - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", shape=[-1, 1], dtype="int64") - - feed_dict["data_id"] = layers.data( - name="data_id", shape=[-1, 1], dtype="int64") - return feed_dict - - def forward(self, inputs, is_infer=False): - """ - Run model main forward. - """ - outputs = {} - if is_infer: - self.generation_caches = [{ - "k": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_key * self.n_head], - dtype=self.dtype, - value=0), - "v": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_value * self.n_head], - dtype=self.dtype, - value=0), - } for i in range(self.n_layer)] - else: - self.generation_caches = None - - outputs["enc_out"], generation_checkpoints = self._generation_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - generation_mask=inputs["generation_mask"], - gather_idx=inputs.get("parent_idx", None)) - - if not is_infer: - outputs["checkpoints"] = generation_checkpoints - return outputs - - def _calc_logits(self, enc_out, checkpoints=None, seq_pos=None): - """Get the logits of generation.""" - enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) - if seq_pos is not None: - seq_pos = layers.cast(x=seq_pos, dtype="int32") - seq_feat = layers.gather(input=enc_out, index=seq_pos) - else: - seq_feat = enc_out - - seq_trans_feat = layers.fc( - input=seq_feat, - size=self.emb_size, - act=self.hidden_act, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", - initializer=self.param_initializer), - bias_attr=fluid.ParamAttr(name="mask_lm_trans_fc.b_0")) - - seq_trans_feat = pre_process_layer( - seq_trans_feat, self.post_cls_cmd, name="mask_lm_trans") - - if checkpoints is not None: - checkpoints.append(seq_trans_feat) - - if self.weight_sharing: - fc_out = layers.matmul( - x=seq_trans_feat, - y=fluid.default_main_program().global_block().var( - self.token_emb_name), - transpose_y=True) - if self.cls_bias: - fc_out += layers.create_parameter( - shape=[self.vocab_size], - dtype=self.dtype, - attr=fluid.ParamAttr(name="mask_lm_out_fc.b_0"), - is_bias=True) - else: - seq_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0") if self.cls_bias else False - fc_out = layers.fc( - input=seq_trans_feat, - size=self.vocab_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self.param_initializer), - bias_attr=seq_out_bias_attr) - return fc_out - - def _get_metrics(self, inputs, outputs): - metrics = {} - - fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], - inputs["tgt_pos"]) - tgt_lm_loss = layers.softmax_with_cross_entropy( - logits=fc_out, label=inputs["tgt_label"]) - mean_tgt_lm_loss = layers.mean(tgt_lm_loss) - loss = mean_tgt_lm_loss - metrics["token_lm_loss"] = mean_tgt_lm_loss - - metrics["loss"] = loss - return metrics - - def _get_statistics(self, inputs, outputs): - statistics = {} - if "tgt_label" in inputs: - statistics["tokens_num"] = layers.reduce_sum( - layers.fill_constant_batch_size_like( - input=inputs["tgt_label"], - value=1.0, - shape=[-1], - dtype="int64")) - statistics["batch_size"] = layers.reduce_sum( - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], value=1.0, shape=[-1], - dtype="int64")) - return statistics - - def get_metrics_and_statistics(self, inputs, outputs): - """ - Get metrics and statistics. - """ - metrics = self._get_metrics(inputs, outputs) - statistics = self._get_statistics(inputs, outputs) - return metrics, statistics - - def infer(self, inputs, outputs): - """ - Run model inference. - """ - if self.do_generation: - return self.generator.inference(self, inputs, outputs) - else: - raise NotImplementedError - - def _run_generation(self, inputs): - """ - Run generation. - """ - batch_size = len(inputs["data_id"]) - inputs["parent_idx"] = np.array(range(batch_size), dtype="int64") - outputs = self._execute( - self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict, - return_numpy=False) - - predictions = [] - data_id_list = np.array(outputs["data_id"]).reshape(-1).tolist() - token_ids_list = np.array(outputs["token_ids"]).squeeze(2).tolist() - seq_ids = outputs["finished_ids"] - seq_ids_np = np.array(outputs["finished_ids"]) - seq_scores_np = np.array(outputs["finished_scores"]) - for i, (data_id, token_ids) in enumerate( - zip(data_id_list, token_ids_list)): - start = seq_ids.lod()[0][i] - end = seq_ids.lod()[0][i + 1] - for j in range(start, end): - sub_start = seq_ids.lod()[1][j] - sub_end = seq_ids.lod()[1][j + 1] - info = {} - info["data_id"] = data_id - info["decode_score"] = float(seq_scores_np[sub_end - 1]) - info["context_token_ids"] = token_ids - info["response_token_ids"] = seq_ids_np[sub_start: - sub_end].tolist() - predictions.append(info) - return predictions - - def infer_step(self, inputs): - """ - Run one inference step. - """ - if self.do_generation: - if self.generator.num_samples: - inputs = { - name: repeat_array_or_tensor(array_or_tensor, self.place, - self.generator.num_samples) - for name, array_or_tensor in inputs.items() - } - - if self.mem_efficient: - predictions = [] - for idx in range(0, len(inputs["data_id"]), self.batch_size): - part_inputs = { - name: slice_array_or_tensor(array_or_tensor, self.place, - idx, idx + self.batch_size) - for name, array_or_tensor in inputs.items() - } - part_outputs = self._run_generation(part_inputs) - predictions.extend(part_outputs) - else: - predictions = self._run_generation(inputs) - return predictions - else: - return self._execute(self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/module.py b/hub_module/modules/text/text_generation/plato2_en_base/module.py deleted file mode 100644 index 5faaa26d9871a943b591b61b7581487f466602c6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/module.py +++ /dev/null @@ -1,194 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import os -import json -import sys -import argparse -import contextlib -from collections import namedtuple - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import plato2_en_base.models as plato_models -from plato2_en_base.tasks.dialog_generation import DialogGeneration -from plato2_en_base.utils import check_cuda, Timer -from plato2_en_base.utils.args import parse_args - - -@moduleinfo( - name="plato2_en_base", - version="1.0.0", - summary= - "A novel pre-training model for dialogue generation, incorporated with latent discrete variables for one-to-many relationship modeling.", - author="baidu-nlp", - author_email="", - type="nlp/text_generation", -) -class Plato(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - if "CUDA_VISIBLE_DEVICES" not in os.environ: - raise RuntimeError( - "The module only support GPU. Please set the environment variable CUDA_VISIBLE_DEVICES." - ) - - args = self.setup_args() - self.task = DialogGeneration(args) - self.model = plato_models.create_model(args, fluid.CUDAPlace(0)) - self.Example = namedtuple("Example", ["src", "data_id"]) - self._interactive_mode = False - - def setup_args(self): - """ - Setup arguments. - """ - assets_path = os.path.join(self.directory, "assets") - vocab_path = os.path.join(assets_path, "vocab.txt") - init_pretraining_params = os.path.join(assets_path, "24L", "Plato") - spm_model_file = os.path.join(assets_path, "spm.model") - nsp_inference_model_path = os.path.join(assets_path, "24L", "NSP") - config_path = os.path.join(assets_path, "24L.json") - - # ArgumentParser.parse_args use argv[1:], it will drop the first one arg, so the first one in sys.argv should be "" - sys.argv = [ - "", "--model", "Plato", "--vocab_path", - "%s" % vocab_path, "--do_lower_case", "False", - "--init_pretraining_params", - "%s" % init_pretraining_params, "--spm_model_file", - "%s" % spm_model_file, "--nsp_inference_model_path", - "%s" % nsp_inference_model_path, "--ranking_score", "nsp_score", - "--do_generation", "True", "--batch_size", "1", "--config_path", - "%s" % config_path - ] - - parser = argparse.ArgumentParser() - plato_models.add_cmdline_args(parser) - DialogGeneration.add_cmdline_args(parser) - args = parse_args(parser) - - args.load(args.config_path, "Model") - args.run_infer = True # only build infer program - - return args - - @serving - def generate(self, texts): - """ - Get the robot responses of the input texts. - - Args: - texts(list or str): If not in the interactive mode, texts should be a list in which every element is the chat context separated with '\t'. - Otherwise, texts shoule be one sentence. The module can get the context automatically. - - Returns: - results(list): the robot responses. - """ - if not texts: - return [] - if self._interactive_mode: - if isinstance(texts, str): - self.context.append(texts.strip()) - texts = [" [SEP] ".join(self.context[-self.max_turn:])] - else: - raise ValueError( - "In the interactive mode, the input data should be a string." - ) - elif not isinstance(texts, list): - raise ValueError( - "If not in the interactive mode, the input data should be a list." - ) - - bot_responses = [] - for i, text in enumerate(texts): - example = self.Example(src=text.replace("\t", " [SEP] "), data_id=i) - record = self.task.reader._convert_example_to_record( - example, is_infer=True) - data = self.task.reader._pad_batch_records([record], is_infer=True) - pred = self.task.infer_step(self.model, data)[0] # batch_size is 1 - bot_response = pred["response"] # ignore data_id and score - bot_responses.append(bot_response) - - if self._interactive_mode: - self.context.append(bot_responses[0].strip()) - return bot_responses - - @contextlib.contextmanager - def interactive_mode(self, max_turn=6): - """ - Enter the interactive mode. - - Args: - max_turn(int): the max dialogue turns. max_turn = 1 means the robot can only remember the last one utterance you have said. - """ - self._interactive_mode = True - self.max_turn = max_turn - self.context = [] - yield - self.context = [] - self._interactive_mode = False - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate(texts=input_data) - - return results - - -if __name__ == "__main__": - module = Plato() - for result in module.generate([ - "Hello", - "Hello\thi, nice to meet you, my name is tom\tso your name is tom?" - ]): - print(result) - with module.interactive_mode(max_turn=3): - while True: - human_utterance = input() - robot_utterance = module.generate(human_utterance) - print("Robot: %s" % robot_utterance[0]) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/readers/dialog_reader.py b/hub_module/modules/text/text_generation/plato2_en_base/readers/dialog_reader.py deleted file mode 100644 index 4d1bd0af832f2f21e7dcadb526576a42b9907d5c..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/readers/dialog_reader.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Dialogue Reader.""" - -import csv -from collections import namedtuple -from contextlib import contextmanager -import gzip -import os - -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.collective import fleet - -from plato2_en_base.utils import pad_batch_data -from plato2_en_base.utils.args import str2bool -from plato2_en_base.utils.masking import mask -import plato2_en_base.utils.tokenization as tokenization - - -class DialogReader(object): - """The implement of DialogReader.""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Reader") - group.add_argument("--max_src_len", type=int, default=128) - group.add_argument("--max_tgt_len", type=int, default=128) - group.add_argument( - "--truncate_first_turn", type=str2bool, default=False) - group.add_argument( - "--file_format", - type=str, - default="file", - choices=["file", "filelist"]) - group.add_argument( - "--data_format", - type=str, - default="raw", - choices=["raw", "tokenized", "numerical"]) - group.add_argument("--in_tokens", type=str2bool, default=False) - group.add_argument("--batch_size", type=int, default=16) - group.add_argument("--continuous_position", type=str2bool, default=True) - group.add_argument("--random_seed", type=int, default=11) - group.add_argument("--sort_pool_size", type=int, default=2**16) - - group = parser.add_argument_group("Tokenizer") - group.add_argument( - "--tokenizer", type=str, default="SentencePieceTokenizer") - args, _ = parser.parse_known_args() - tokenizer_cls = getattr(tokenization, args.tokenizer) - tokenizer_cls.add_cmdline_args(parser) - return group - - def __init__(self, args): - tokenizer_cls = getattr(tokenization, args.tokenizer) - self.tokenizer = tokenizer_cls(args) - self.vocab = self.tokenizer.vocab - self.pad_id = args.pad_id = self.vocab["[PAD]"] - self.bos_id = args.bos_id = self.vocab["[CLS]"] - self.eos_id = args.eos_id = self.vocab["[SEP]"] - self.unk_id = args.unk_id = self.vocab["[UNK]"] - self.mask_id = args.mask_id = self.vocab["[MASK]"] - self.vocab_size = args.get("vocab_size", 0) - self.max_src_len = args.max_src_len - self.max_tgt_len = args.max_tgt_len - self.truncate_first_turn = args.truncate_first_turn - self.file_format = args.file_format - self.data_format = args.data_format - self.in_tokens = args.in_tokens - self.batch_size = args.batch_size - self.continuous_position = args.continuous_position - self.sort_pool_size = args.sort_pool_size - - # random_seed must be set for data slicing when using multi-gpu - self.global_rng = np.random.RandomState(args.random_seed) - - # training progress - self.current_example = 0 - self.current_epoch = 0 - self.num_examples = 0 - - # model related - - self.fields = ["token_ids", "type_ids", "pos_ids"] - self.num_numerical_fields = len(self.fields) - self.fields += ["tgt_start_idx", "data_id"] - self.sort_key = lambda record: [len(record.token_ids)] - - self.Record = namedtuple( - "Record", self.fields, defaults=(None, ) * len(self.fields)) - - self.features = {} - return - - def get_train_progress(self): - """Gets progress for training phase.""" - return self.current_epoch, self.current_file_index, self.total_file - - def _convert_example_to_record(self, example, is_infer): - # process src - src_token_ids = [] - src_pos_ids = [] - - # tokenize src - s_token_ids_list = [] - for s in example.src.split("[SEP]"): - s = tokenization.convert_to_unicode(s).strip() - - if self.data_format == "tokenized": - s_tokens = s.split(" ") - else: - s_tokens = self.tokenizer.tokenize(s) - - s_token_ids = self.tokenizer.convert_tokens_to_ids(s_tokens) + [ - self.eos_id - ] - s_token_ids_list.append(s_token_ids) - - # trim src - idx = len(s_token_ids_list) - 1 - total_token_num = 1 - while idx >= 0: - total_token_num += len(s_token_ids_list[idx]) - if total_token_num > self.max_src_len: - if self.truncate_first_turn and idx == 0: - truncated_ids = s_token_ids_list[idx][:self.max_src_len - - total_token_num] - if len(truncated_ids) > 1: - s_token_ids_list[idx] = truncated_ids[:-1] + [ - self.eos_id - ] - idx -= 1 - break - idx -= 1 - - for i, s_token_ids in enumerate(s_token_ids_list[idx + 1:], idx + 1): - src_token_ids += s_token_ids - src_pos_ids += list(range(1, len(s_token_ids) + 1)) - - src_token_ids = [self.bos_id] + src_token_ids - src_type_ids = [0] * len(src_token_ids) - src_pos_ids = [0] + src_pos_ids - assert len(src_token_ids) == len(src_type_ids) == len(src_pos_ids), \ - "not len(src_token_ids) == len(src_type_ids) == len(src_pos_ids)" - - token_ids = src_token_ids - type_ids = src_type_ids - pos_ids = src_pos_ids - tgt_start_idx = len(token_ids) - - if not is_infer: - # process tgt - # tokenize tgt - tgt = tokenization.convert_to_unicode(example.tgt).strip() - if self.data_format == "tokenized": - tgt_tokens = tgt.split(" ") - else: - tgt_tokens = self.tokenizer.tokenize(tgt) - - tgt_token_ids = self.tokenizer.convert_tokens_to_ids(tgt_tokens) - tgt_token_ids.append(self.eos_id) - - # trim tgt - if len(tgt_token_ids) > self.max_tgt_len - 1: - tgt_token_ids = tgt_token_ids[:self.max_tgt_len - 1] - - tgt_token_ids = [self.bos_id] + tgt_token_ids - tgt_type_ids = [1] * len(tgt_token_ids) - tgt_pos_ids = list(range(1, len(tgt_token_ids) + 1)) - assert len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids), \ - "not len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids)" - - token_ids += tgt_token_ids - type_ids += tgt_type_ids - pos_ids += tgt_pos_ids - - assert len(token_ids) == len(type_ids) == len(pos_ids), \ - "not len(token_ids) == len(type_ids) == len(pos_ids)" - - if self.continuous_position: - src_pos_ids = list(range(len(src_token_ids))) - if not is_infer: - tgt_pos_ids = list(range(len(tgt_token_ids))) - pos_ids = list(range(len(token_ids))) - - field_values = { - "token_ids": src_token_ids, - "type_ids": src_type_ids, - "pos_ids": src_pos_ids - } - field_values["tgt_start_idx"] = tgt_start_idx - field_values["data_id"] = example.data_id - - record = self.Record(**field_values) - return record - - def _read_tsv(self, fp, phase, is_infer, delimiter="\t", quotechar=None): - """Reads a tab separated value file.""" - csv.field_size_limit(2**20) - reader = csv.reader(fp, delimiter=delimiter, quotechar=quotechar) - headers = next(reader) - headers.append("data_id") - Example = namedtuple("Example", headers) - - for i, line in enumerate(reader): - example = Example(*line, data_id=i) - if is_infer or phase.endswith("test"): - self.features[phase][i] = example - record = self._convert_example_to_record(example, is_infer) - yield record - - def _read_numerical_file(self, fp, delimiter=";"): - for i, line in enumerate(fp): - cols = tokenization.convert_to_unicode(line).strip().split( - delimiter) - cols = list(map(lambda x: list(map(int, x.split(" "))), cols)) - if len(cols) > self.num_numerical_fields: - cols = cols[:self.num_numerical_fields] - tgt_start_idx = cols[0].index(self.bos_id, 1) - record = self.Record(*cols, tgt_start_idx=tgt_start_idx, data_id=i) - yield record - - def _read_file(self, input_file, phase, is_infer): - def __wrapper__(): - with open_file(input_file) as fp: - if self.data_format == "numerical": - records = self._read_numerical_file(fp) - else: - records = self._read_tsv(fp, phase, is_infer) - for record in records: - yield record - - return __wrapper__ - - def _read_files(self, filelist, phase, is_infer, shuffle_files): - input_files = open(filelist).readlines() - - def __wrapper__(): - if shuffle_files: - self.global_rng.shuffle(input_files) - - if phase == "train": - self.total_file = len(input_files) - for file_index, input_file in enumerate(input_files, 1): - if phase == "train": - self.current_file_index = file_index - self.current_file = input_file - file_reader = self._read_file(input_file.strip(), phase, - is_infer) - for record in file_reader(): - yield record - - return __wrapper__ - - def _batch_reader(self, - reader, - phase=None, - is_infer=False, - sort_pool_size=2**16): - """Construct a batch reader.""" - - def update_max_lens(max_lens, record): - """Update max_lens.""" - if max_lens is None: - return self.sort_key(record) - else: - return [ - max(max_len, l) - for max_len, l in zip(max_lens, self.sort_key(record)) - ] - - def get_batch(reader): - """Generate batches from reader.""" - batch, max_lens = [], None - for record in reader(): - if record is None: - yield batch - batch, max_lens = [], None - continue - - self.current_example += 1 - max_lens = update_max_lens(max_lens, record) - if self.in_tokens: - to_append = ( - len(batch) + 1) * sum(max_lens) <= self.batch_size - else: - to_append = len(batch) < self.batch_size - if to_append: - batch.append(record) - else: - yield batch - batch, max_lens = [record], self.sort_key(record) - - if len(batch) > 0: - yield batch - - def get_sorted_batch(pool): - """Generate sorted batches from pool.""" - pool = sorted(pool, key=self.sort_key) - batches = [] - batch, max_lens = [], None - for record in pool: - self.current_example += 1 - max_lens = update_max_lens(max_lens, record) - if self.in_tokens: - to_append = ( - len(batch) + 1) * sum(max_lens) <= self.batch_size - else: - to_append = len(batch) < self.batch_size - if to_append: - batch.append(record) - else: - batches.append(batch) - batch, max_lens = [record], self.sort_key(record) - - if len(batch) > 0: - batches.append(batch) - self.global_rng.shuffle(batches) - - for batch in batches: - yield batch - - def __wrapper__(): - if sort_pool_size > 0: - pool = [] - for record in reader(): - pool.append(record) - if len(pool) == sort_pool_size: - for batch in get_sorted_batch(pool): - yield batch - pool = [] - if len(pool) > 0: - for batch in get_sorted_batch(pool): - yield batch - else: - for batch in get_batch(reader): - yield batch - - return __wrapper__ - - def _distributed_batch_reader(self, - batch_reader, - num_part, - part_id, - is_test=False): - def __wrapper__(): - batches = [] - for batch in batch_reader(): - batches.append(batch) - if len(batches) == num_part: - yield batches[part_id] - batches = [] - if is_test and 0 <= part_id < len(batches): - yield batches[part_id] - return - - return __wrapper__ - - def data_generator(self, - input_file=None, - reader=None, - num_epochs=1, - num_part=1, - part_id=0, - phase=None, - is_infer=False): - """Data generator.""" - - def __wrapper__(): - if is_infer or phase.endswith("test"): - self.features[phase] = {} - - nonlocal reader - if reader is None: - if self.file_format == "filelist": - reader = self._read_files(input_file, phase, is_infer, - not phase.endswith("test")) - else: - if phase == "train": - self.total_file = 1 - self.current_file_index = 1 - self.current_file = input_file - reader = self._read_file(input_file, phase, is_infer) - - batch_reader = self._batch_reader( - reader, - phase, - is_infer, - sort_pool_size=self.sort_pool_size if not is_infer else 0) - if phase == "train": - batch_reader = self._distributed_batch_reader( - batch_reader, num_part, part_id) - elif phase.startswith("distributed"): - batch_reader = self._distributed_batch_reader( - batch_reader, num_part, part_id, is_test=True) - - for epoch_index in range(num_epochs): - if phase == "train": - self.current_example = 0 - self.current_epoch = epoch_index + 1 - for batch in batch_reader(): - yield self._pad_batch_records(batch, is_infer) - - return __wrapper__ - - def _gen_self_attn_mask(self, - batch_token_ids, - batch_tgt_start_idx=None, - is_unidirectional=True, - shift_len=0): - max_len = max(map(len, batch_token_ids)) - input_mask_data = np.zeros((len(batch_token_ids), max_len + shift_len, - max_len + shift_len)) - if is_unidirectional: - for index, mask_data in enumerate(input_mask_data): - start = 0 if batch_tgt_start_idx is None else batch_tgt_start_idx[ - index] - end = len(batch_token_ids[index]) - mask_data[:end + shift_len, :start + shift_len] = 1.0 - # Generate the lower triangular matrix using the slice of matrix - b = np.tril(np.ones([end - start, end - start]), 0) - mask_data[start + shift_len:end + shift_len, start + - shift_len:end + shift_len] = b - else: - for index, token_ids in enumerate(batch_token_ids): - input_mask_data[index, :len(token_ids) + - shift_len, :len(token_ids) + shift_len] = 1.0 - return input_mask_data.astype("float32") - - def _pad_batch_records(self, batch_records, is_infer): - """ - Padding batch records and construct model's inputs. - """ - batch_size = len(batch_records) - batch = {} - batch_token_ids = [record.token_ids for record in batch_records] - batch_type_ids = [record.type_ids for record in batch_records] - batch_pos_ids = [record.pos_ids for record in batch_records] - batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - - batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] - batch["generation_mask"] = self._gen_self_attn_mask( - batch_token_ids, batch_tgt_start_idx=batch_tgt_start_idx) - - if is_infer: - tgt_ids = np.array( - [[[self.bos_id]]] * len(batch_token_ids), dtype="int64") - if self.continuous_position: - tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") - else: - tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") - tgt_pos = tgt_pos.reshape(-1, 1, 1) - batch["init_score"] = np.zeros_like( - tgt_ids, dtype="float32").reshape(-1, 1).tolist() - batch["tgt_ids"] = tgt_ids.tolist() - batch["tgt_pos"] = tgt_pos.tolist() - - batch["tgt_generation_mask"] = batch[ - "generation_mask"][:, 0:1, :].astype("float32") - else: - batch["tgt_label"], batch["tgt_pos"] = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - sent_b_starts=batch_tgt_start_idx, - is_unidirectional=True) - - batch_data_id = [record.data_id for record in batch_records] - batch["data_id"] = np.array(batch_data_id).astype("int64").reshape( - [-1, 1]) - return batch - - -@contextmanager -def open_file(filename): - """Open file.""" - if filename.endswith(".gz"): - fp = gzip.open(filename, "rt") - else: - fp = open(filename) - yield fp - fp.close() diff --git a/hub_module/modules/text/text_generation/plato2_en_base/readers/nsp_reader.py b/hub_module/modules/text/text_generation/plato2_en_base/readers/nsp_reader.py deleted file mode 100644 index 381e15f3eae241f2520e63557bafd9c1211c891b..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/readers/nsp_reader.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""NSP Reader.""" - -from collections import namedtuple - -import numpy as np - -from plato2_en_base.readers.dialog_reader import DialogReader -from plato2_en_base.utils import pad_batch_data -from plato2_en_base.utils.args import str2bool -from plato2_en_base.utils.masking import mask - - -class NSPReader(DialogReader): - """NSP Reader.""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = DialogReader.add_cmdline_args(parser) - group.add_argument( - "--attention_style", - type=str, - default="bidirectional", - choices=["bidirectional", "unidirectional"]) - group.add_argument( - "--mix_negative_sample", type=str2bool, default=False) - return group - - def __init__(self, args): - super(NSPReader, self).__init__(args) - self.fields.append("label") - self.Record = namedtuple( - "Record", self.fields, defaults=(None, ) * len(self.fields)) - - self.attention_style = args.attention_style - self.mix_negative_sample = args.mix_negative_sample - return - - def _convert_example_to_record(self, example, is_infer): - record = super(NSPReader, self)._convert_example_to_record( - example, False) - if "label" in example._fields: - record = record._replace(label=int(example.label)) - return record - - def _mix_negative_sample(self, reader, neg_pool_size=2**16): - def gen_from_pool(pool): - num_samples = len(pool) - if num_samples == 1: - # only one sample: it is impossible to generate negative sample - yield pool[0]._replace(label=1) - return - self.global_rng.shuffle(pool) - for i in range(num_samples): - pool[i] = pool[i]._replace(label=1) - j = (i + 1) % num_samples - idx_i = pool[i].tgt_start_idx - idx_j = pool[j].tgt_start_idx - field_values = {} - field_values["token_ids"] = pool[i].token_ids[:idx_i] + pool[ - j].token_ids[idx_j:] - field_values["type_ids"] = pool[i].type_ids[:idx_i] + pool[ - j].type_ids[idx_j:] - field_values["pos_ids"] = list( - range(len(field_values["token_ids"]))) - neg_record = self.Record( - **field_values, tgt_start_idx=idx_i, data_id=-1, label=0) - pool.append(neg_record) - assert len(neg_record.token_ids) <= self.max_seq_len - self.global_rng.shuffle(pool) - for record in pool: - yield record - - def __wrapper__(): - pool = [] - for record in reader(): - pool.append(record) - if len(pool) == neg_pool_size: - for record in gen_from_pool(pool): - yield record - pool = [] - if len(pool) > 0: - for record in gen_from_pool(pool): - yield record - - return __wrapper__ - - def _batch_reader(self, - reader, - phase=None, - is_infer=False, - sort_pool_size=2**16): - if self.mix_negative_sample: - reader = self._mix_negative_sample(reader) - return super(NSPReader, self)._batch_reader( - reader, - phase=phase, - is_infer=is_infer, - sort_pool_size=sort_pool_size) - - def _pad_batch_records(self, batch_records, is_infer): - """ - Padding batch records and construct model's inputs. - """ - batch = {} - batch_token_ids = [record.token_ids for record in batch_records] - batch_type_ids = [record.type_ids for record in batch_records] - batch_pos_ids = [record.pos_ids for record in batch_records] - batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] - batch_label = [record.label for record in batch_records] - - if self.attention_style == "unidirectional": - batch["token_ids"] = pad_batch_data( - batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data( - batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - tgt_label, tgt_pos, label_pos = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - bos_id=self.bos_id, - sent_b_starts=batch_tgt_start_idx, - labels=batch_label, - is_unidirectional=True) - attention_mask = self._gen_self_attn_mask(batch_token_ids, - batch_tgt_start_idx) - else: - batch_mask_token_ids, tgt_label, tgt_pos, label_pos = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - bos_id=self.bos_id, - eos_id=self.eos_id, - mask_id=self.mask_id, - sent_b_starts=batch_tgt_start_idx, - labels=batch_label, - is_unidirectional=False) - if not is_infer: - batch_token_ids = batch_mask_token_ids - batch["token_ids"] = pad_batch_data( - batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data( - batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - attention_mask = self._gen_self_attn_mask( - batch_token_ids, is_unidirectional=False) - - batch["attention_mask"] = attention_mask - batch["label_pos"] = label_pos - - if not is_infer: - batch_label = np.array(batch_label).astype("int64").reshape([-1, 1]) - batch["label"] = batch_label - batch["tgt_label"] = tgt_label - batch["tgt_pos"] = tgt_pos - - batch_data_id = [record.data_id for record in batch_records] - batch["data_id"] = np.array(batch_data_id).astype("int64").reshape( - [-1, 1]) - return batch diff --git a/hub_module/modules/text/text_generation/plato2_en_base/readers/plato_reader.py b/hub_module/modules/text/text_generation/plato2_en_base/readers/plato_reader.py deleted file mode 100644 index e04c857c28675afea2c6bce8026977b0e053f4a2..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/readers/plato_reader.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Plato Reader.""" - -import numpy as np - -from plato2_en_base.readers.dialog_reader import DialogReader -from plato2_en_base.utils import pad_batch_data -from plato2_en_base.utils.masking import mask - - -class PlatoReader(DialogReader): - """The implement of PlatoReader""" - - def __init__(self, args): - super(PlatoReader, self).__init__(args) - self.latent_type_size = args.latent_type_size - self.use_bow = args.use_bow - - def _pad_batch_records(self, batch_records, is_infer): - """ - Padding batch records and construct model's inputs. - """ - batch = {} - batch_token_ids = [record.token_ids for record in batch_records] - batch_type_ids = [record.type_ids for record in batch_records] - batch_pos_ids = [record.pos_ids for record in batch_records] - - batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] - - batch_size = len(batch_token_ids) - - # padding - batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - - batch["generation_mask"] = self._gen_self_attn_mask( - batch_token_ids, - batch_tgt_start_idx=batch_tgt_start_idx, - is_unidirectional=True, - shift_len=1) - if not is_infer: - batch["recognition_mask"] = self._gen_self_attn_mask( - batch_token_ids, is_unidirectional=False, shift_len=1) - - if is_infer: - tgt_ids = np.array([[[self.bos_id]]] * batch_size, dtype="int64") - if self.continuous_position: - tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") - else: - tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") - tgt_pos = tgt_pos.reshape(-1, 1, 1) - batch["init_score"] = np.zeros_like( - tgt_ids, dtype="float32").reshape(-1, 1).tolist() - batch["tgt_ids"] = tgt_ids.tolist() - batch["tgt_pos"] = tgt_pos.tolist() - batch["parent_idx"] = np.array(range(batch_size), dtype="int32") - - batch["tgt_generation_mask"] = batch[ - "generation_mask"][:, 0:1, :].astype("float32") - else: - mask_return_list = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - sent_b_starts=batch_tgt_start_idx, - is_unidirectional=True, - use_latent=True, - use_bow=self.use_bow) - batch["tgt_label"] = mask_return_list[0] - batch["tgt_pos"] = mask_return_list[1] - if self.use_bow: - batch["bow_label"] = mask_return_list[2] - batch["bow_pos"] = mask_return_list[3] - - batch_data_id = [record.data_id for record in batch_records] - batch["data_id"] = np.array(batch_data_id).astype("int64").reshape( - [-1, 1]) - return batch diff --git a/hub_module/modules/text/text_generation/plato2_en_base/tasks/dialog_generation.py b/hub_module/modules/text/text_generation/plato2_en_base/tasks/dialog_generation.py deleted file mode 100644 index dca017008fce32a78624ca655b4b05d66585010e..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/tasks/dialog_generation.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Dialogue generation task.""" - -from collections import defaultdict -import math - -from plato2_en_base.readers.dialog_reader import DialogReader -from plato2_en_base.readers.plato_reader import PlatoReader -from plato2_en_base.tasks import register_task -from plato2_en_base.tasks.task_base import Task -from plato2_en_base.utils.args import str2bool -from plato2_en_base.utils.inference import create_predictor - - -def post_process_context(token_ids, reader, merge=True): - """Post-process the context sequence.""" - context = [] - utt = [] - for tok_id in token_ids[1:]: - if tok_id == reader.eos_id: - utt = reader.tokenizer.convert_ids_to_tokens(utt) - if merge: - utt = reader.tokenizer.merge_subword(utt) - context.append(utt) - utt = [] - else: - utt.append(tok_id) - return context - - -def post_process_response(token_ids, reader, merge=True): - """ - Post-process the decoded sequence. Truncate from the first - and remove the and tokens currently. - """ - eos_pos = len(token_ids) - for i, tok_id in enumerate(token_ids): - if tok_id == reader.eos_id: - eos_pos = i - break - token_ids = token_ids[1:eos_pos] - response = reader.tokenizer.convert_ids_to_tokens(token_ids) - if merge: - response = reader.tokenizer.merge_subword(response) - return token_ids, response - - -def get_cross_turn_repetition(context, pred_tokens, eos_idx, is_cn=False): - """Get cross-turn repetition.""" - if len(pred_tokens) == 0: - return 1.0 - if is_cn: - context = ["".join(utt) for utt in context] - pred_tokens = "".join(pred_tokens) - - pred_tri_grams = set() - for i in range(len(pred_tokens) - 2): - tri_gram = tuple(pred_tokens[i:i + 3]) - pred_tri_grams.add(tri_gram) - for utt in context: - for i in range(len(utt) - 2): - tri_gram = tuple(utt[i:i + 3]) - if tri_gram in pred_tri_grams: - return 1.0 - return 0.0 - - -def get_in_turn_repetition(pred, is_cn=False): - """Get in-turn repetition.""" - if len(pred) == 0: - return 1.0 - if isinstance(pred[0], str): - pred = [tok.lower() for tok in pred] - if is_cn: - pred = "".join(pred) - tri_grams = set() - for i in range(len(pred) - 2): - tri_gram = tuple(pred[i:i + 3]) - if tri_gram in tri_grams: - return 1.0 - tri_grams.add(tri_gram) - return 0.0 - - -def get_nsp_score_batch(nsp_predictor, predictions): - """ - Get NSP scores of a batch. - """ - import argparse - from collections import namedtuple - - from plato2_en_base.readers.nsp_reader import NSPReader - from plato2_en_base.utils.args import parse_args - from plato2_en_base.tasks.next_sentence_prediction import NextSentencePrediction - - parser = argparse.ArgumentParser() - NextSentencePrediction.add_cmdline_args(parser) - parser.add_argument("--num_samples", type=int, default=None) - parser.add_argument("--config_path", type=str, required=True) - parser.add_argument("--mem_efficient", type=str2bool, default=False) - - args = parse_args(parser, allow_unknown=True) - args.load(args.config_path) - if not args.mem_efficient: - if args.num_samples: - args.batch_size *= args.num_samples - if args.latent_type_size: - args.batch_size *= args.latent_type_size - args.tokenized_input = True - reader = NSPReader(args) - - def __reader__(): - headers = ["src", "tgt", "data_id"] - - Example = namedtuple("Example", headers) - - for i, info in enumerate(predictions): - context = post_process_context( - info["context_token_ids"], reader, merge=False) - context_tokenized_input = " [SEP] ".join( - " ".join(utt) for utt in context) - _, response = post_process_response( - info["response_token_ids"], reader, merge=False) - response_tokenized_input = " ".join(response) - example = Example( - src=context_tokenized_input, - tgt=response_tokenized_input, - data_id=i) - record = reader._convert_example_to_record(example, is_infer=True) - yield record - return - - generator = reader.data_generator( - reader=__reader__, - is_infer=True, - phase="test", - ) - - steps = 0 - for data in generator(): - outputs = nsp_predictor(data) - for probs, data_id in zip(outputs[0], outputs[-1]): - data_id = data_id[0] - info = predictions[data_id] - info["nsp_score"] = float(probs[1]) - - return - - -@register_task("DialogGeneration") -class DialogGeneration(Task): - """ - Define dialogue response generation. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Task") - group.add_argument("--do_generation", type=str2bool, default=False) - group.add_argument("--is_cn", type=str2bool, default=False) - - group.add_argument("--nsp_inference_model_path", type=str, default=None) - group.add_argument( - "--nsp_attention_style", type=str, default="bidirectional") - - group.add_argument("--ranking_score", type=str, default="decode_score") - - args, _ = parser.parse_known_args() - if args.model == "Plato": - PlatoReader.add_cmdline_args(parser) - else: - DialogReader.add_cmdline_args(parser) - return group - - def __init__(self, args): - super(DialogGeneration, self).__init__(args) - self.do_generation = args.do_generation - self.is_cn = args.is_cn - if args.model == "Plato": - self.reader = PlatoReader(args) - else: - self.reader = DialogReader(args) - - if args.nsp_inference_model_path: - self.nsp_predictor = create_predictor(args.nsp_inference_model_path, - args.is_distributed) - self.nsp_attention_style = args.nsp_attention_style - else: - self.nsp_predictor = None - - self.ranking_score = args.ranking_score - self.max_dec_len = args.max_dec_len - return - - def _post_process_generation_output(self, predictions): - """ - Post process generation output. - - Calculate repetion, reranking. - """ - for info in predictions: - tokens = post_process_context(info["context_token_ids"], - self.reader) - pred_token_ids, pred_tokens = post_process_response( - info["response_token_ids"], self.reader) - info["context"] = " [SEP] ".join(" ".join(u) for u in tokens) - info["response"] = " ".join(pred_tokens) - info["num_token"] = len(pred_token_ids) - info["cross_turn_repetition"] = get_cross_turn_repetition( - tokens, pred_tokens, self.reader.eos_id, self.is_cn) - info["in_turn_repetition"] = max( - get_in_turn_repetition(pred_tokens, self.is_cn), - get_in_turn_repetition(pred_token_ids)) - if self.nsp_predictor is not None: - get_nsp_score_batch(self.nsp_predictor, predictions) - - group = defaultdict(list) - for info in predictions: - group[info["data_id"]].append(info) - - predictions = [] - for data_id in group: - infos = group[data_id] - for info in infos: - info["score"] = info[self.ranking_score] - if self.max_dec_len is not None and info[ - "num_token"] >= self.max_dec_len: # not ending - info["score"] -= 1e3 - elif info["cross_turn_repetition"] > 0: - info["score"] -= 1e3 - elif info["in_turn_repetition"] > 0: - info["score"] -= 1e3 - infos = sorted(infos, key=lambda info: -info["score"]) - pred = infos[0] - keep_attr = ["data_id", "score", "response"] - pred = {k: pred[k] for k in keep_attr} - predictions.append(pred) - return predictions - - def _post_process_scoring_output(self, predictions): - raise NotImplementedError - - def _post_process_infer_output(self, predictions): - if self.do_generation: - return self._post_process_generation_output(predictions) - else: - return self._post_process_scoring_output(predictions) - - def merge_mertrics_and_statistics(self, outputs, part_outputs): - """ - Merge two evaulation output. - """ - if outputs is None: - return part_outputs - - if part_outputs is None: - return outputs - - batch_size = outputs.pop("batch_size") - tokens_num = outputs.pop("tokens_num") - part_batch_size = part_outputs.pop("batch_size") - part_tokens_num = part_outputs.pop("tokens_num") - - new_outputs = { - "batch_size": batch_size + part_batch_size, - "tokens_num": tokens_num + part_tokens_num - } - for k in outputs: - if k.startswith("token_"): - new_outputs[k] = (outputs[k] * tokens_num + part_outputs[k] * - part_tokens_num) / new_outputs["tokens_num"] - else: - new_outputs[k] = (outputs[k] * batch_size + part_outputs[k] * - part_batch_size) / new_outputs["batch_size"] - return new_outputs - - def get_metrics(self, outputs): - """ - Get metrics. - """ - if outputs is None: - raise ValueError("metrics is None") - outputs = dict(outputs) - outputs.pop("batch_size", None) - outputs.pop("tokens_num", None) - metrics = {} - for k in outputs: - if k.startswith("token_"): - metrics[k[6:]] = outputs[k] - else: - metrics[k] = outputs[k] - if k == "token_lm_loss": - metrics["ppl"] = math.exp(outputs[k]) - return metrics diff --git a/hub_module/modules/text/text_generation/plato2_en_base/tasks/next_sentence_prediction.py b/hub_module/modules/text/text_generation/plato2_en_base/tasks/next_sentence_prediction.py deleted file mode 100644 index 3708ba0ea91c7079bcbaf8dd87cffe3b72edf3dd..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/tasks/next_sentence_prediction.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Next sentence prediction task.""" - -from plato2_en_base.readers.nsp_reader import NSPReader -from plato2_en_base.tasks import register_task -from plato2_en_base.tasks.task_base import Task -from plato2_en_base.utils.args import str2bool - - -@register_task("NextSentencePrediction") -class NextSentencePrediction(Task): - """ - Define dialogue response generation. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = NSPReader.add_cmdline_args(parser) - return group - - def __init__(self, args): - super(NextSentencePrediction, self).__init__(args) - self.reader = NSPReader(args) - return - - def _post_process_infer_output(self, predictions): - predictions = [{ - "data_id": data_id.tolist()[0], - "score": score.tolist()[1] - } for data_id, score in zip(predictions["data_id"], - predictions["scores"])] - return predictions diff --git a/hub_module/modules/text/text_generation/plato2_en_base/tasks/task_base.py b/hub_module/modules/text/text_generation/plato2_en_base/tasks/task_base.py deleted file mode 100644 index a7acf7558f7d275773704cb9e5bb4c708fd12273..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/tasks/task_base.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Task base.""" - -from abc import abstractmethod, ABC - -from plato2_en_base.models.model_base import Model - - -class Task(ABC): - """ - Basic task. - """ - - def __init__(self, args): - return - - def train_step(self, model: Model, inputs): - """Run one training step.""" - outputs = model.train_step(inputs) - outputs = {k: v.tolist()[0] for k, v in outputs.items()} - return outputs - - def eval_step(self, model: Model, inputs): - """Run one evaluation step""" - outputs = model.eval_step(inputs) - outputs = {k: v.tolist()[0] for k, v in outputs.items()} - return outputs - - def infer_step(self, model: Model, inputs): - """Run one inference step.""" - predictions = model.infer_step(inputs) - outputs = self._post_process_infer_output(predictions) - return outputs - - def _post_process_infer_output(self, predictions): - """ - Post-process inference output. - """ - return predictions - - def merge_mertrics_and_statistics(self, outputs, part_outputs): - """ - Merge metrics and statistics. - """ - if outputs is None: - return part_outputs - - if part_outputs is None: - return outputs - - batch_size = outputs.pop("batch_size") - part_batch_size = part_outputs.pop("batch_size") - - new_outputs = { - "batch_size": batch_size + part_batch_size, - } - for k in outputs: - new_outputs[k] = (outputs[k] * batch_size + part_outputs[k] * - part_batch_size) / new_outputs["batch_size"] - return new_outputs - - def get_metrics(self, outputs): - """ - Get metrics. - """ - if outputs is None: - raise ValueError("metrics is None") - outputs = dict(outputs) - # pop statistics - outputs.pop("batch_size", None) - return outputs - - def get_data_loader(self, model, *args, is_infer=False, **kwargs): - generator = self.reader.data_generator( - *args, is_infer=is_infer, **kwargs) - return model.get_data_loader(generator, is_infer) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/utils/__init__.py b/hub_module/modules/text/text_generation/plato2_en_base/utils/__init__.py deleted file mode 100644 index 12da7e947a821727e8ff3ee6c683c84f64583281..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/utils/__init__.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utils.""" - -from itertools import chain -import os -import time -import sys - -import numpy as np -import paddle.fluid as fluid - - -def to_lodtensor(data, place): - """Convert data to LoDTensor.""" - if place is None: - return data - lengths = [] - while isinstance(data[0], list): - lengths.append(list(map(len, data))) - data = [x for xs in data for x in xs] - if isinstance(data[0], float): - data = np.array(data, dtype="float32") - else: - data = np.array(data, dtype="int64") - data_tensor = fluid.LoDTensor() - data_tensor.set(data, place) - data_tensor.set_recursive_sequence_lengths(lengths) - return data_tensor - - -def pad_batch_data(insts, pad_id=0): - """Pad the instances to the max sequence length in batch. """ - max_len = max(map(len, insts)) - inst_data = np.array( - [list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts]) - return inst_data.astype("int64").reshape([-1, max_len, 1]) - - -def convert_lodtensor_to_list(tensor): - data = np.array(tensor) - recursive_sequence_lengths = tensor.recursive_sequence_lengths() - recursive_sequence_lengths.reverse() - for i, lengths in enumerate(recursive_sequence_lengths): - shift = 0 - new_data = [] - for j, l in enumerate(lengths): - new_data.append(data[shift:shift + l]) - shift += l - data = new_data - return data - - -def concatenate_lodtensors(tensors, place): - """Concatenate LoD tensors.""" - data = [] - recursive_sequence_lengths = [] - for tensor in tensors: - data.append(np.array(tensor)) - recursive_sequence_lengths.append(tensor.recursive_sequence_lengths()) - data = np.concatenate(data, axis=0) - recursive_sequence_lengths = [ - sum(lens, []) for lens in zip(*recursive_sequence_lengths) - ] - data_tensor = fluid.LoDTensor() - data_tensor.set(data, place) - data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) - assert data_tensor.has_valid_recursive_sequence_lengths() - return data_tensor - - -def repeat_array_or_tensor(array_or_tensor, place, times): - """Repeate numpy array or LoD tensor.""" - if isinstance(array_or_tensor, fluid.LoDTensor): - data = [np.array(array_or_tensor)] * times - recursive_sequence_lengths = [ - array_or_tensor.recursive_sequence_lengths() - ] * times - data = np.concatenate(data, axis=0) - recursive_sequence_lengths = [ - sum(lens, []) for lens in zip(*recursive_sequence_lengths) - ] - data_tensor = fluid.LoDTensor() - data_tensor.set(data, place) - data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) - assert data_tensor.has_valid_recursive_sequence_lengths() - return data_tensor - elif isinstance(array_or_tensor, list): - return list(chain(*([array_or_tensor] * times))) - else: - return np.concatenate([array_or_tensor] * times, axis=0) - - -def slice_array_or_tensor(array_or_tensor, place, begin, end): - """Repeate numpy array or LoD tensor.""" - if isinstance(array_or_tensor, fluid.LoDTensor): - data = convert_lodtensor_to_list(array_or_tensor) - data = data[begin:end] - return to_lodtensor(data, place) - else: - return array_or_tensor[begin:end] - - -def init_checkpoint(exe, init_checkpoint_path, main_program): - """Initialize from checkpoint.""" - assert os.path.exists( - init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path - - def existed_persitables(var): - """Whether var is a persistables.""" - if not fluid.io.is_persistable(var): - return False - return os.path.exists(os.path.join(init_checkpoint_path, var.name)) - - fluid.io.load_vars( - exe, - init_checkpoint_path, - main_program=main_program, - predicate=existed_persitables) - print(f"Load model from {init_checkpoint_path}") - - -def init_pretraining_params(exe, pretraining_params_path, main_program): - """Only initialize parameters.""" - assert os.path.exists(pretraining_params_path - ), "[%s] cann't be found." % pretraining_params_path - - def existed_params(var): - """Whether var is a parameter.""" - if not isinstance(var, fluid.framework.Parameter): - return False - return os.path.exists(os.path.join(pretraining_params_path, var.name)) - - fluid.io.load_vars( - exe, - pretraining_params_path, - main_program=main_program, - predicate=existed_params) - print(f"Load pretraining parameters from {pretraining_params_path}.") - - return - - -class Timer(object): - def __init__(self): - self._pass_time = 0 - self._start_time = None - return - - def start(self): - self._start_time = time.time() - - def pause(self): - self._pass_time += time.time() - self._start_time - self._start_time = None - - def reset(self): - self._pass_time = 0 - - @property - def pass_time(self): - if self._start_time is None: - return self._pass_time - else: - return self._pass_time + time.time() - self._start_time - - -ERROR_MESSAGE = "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ - Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" - - -def check_cuda(use_cuda, err=ERROR_MESSAGE): - """Check CUDA.""" - try: - if use_cuda and not fluid.is_compiled_with_cuda(): - print(err) - sys.exit(1) - except Exception as e: - pass diff --git a/hub_module/modules/text/text_generation/plato2_en_base/utils/inference.py b/hub_module/modules/text/text_generation/plato2_en_base/utils/inference.py deleted file mode 100644 index f9b01d3e30163519c815311dd54034e74d8e4947..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/utils/inference.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Inference utils.""" - -import os - -import paddle.fluid as fluid - - -def create_predictor(inference_model_path, is_distributed=False): - """Create predictor.""" - if is_distributed: - dev_count = fluid.core.get_cuda_device_count() - gpu_id = int(os.getenv("FLAGS_selected_gpus")) - else: - dev_count = 1 - gpu_id = 0 - - place = fluid.CUDAPlace(gpu_id) - exe = fluid.Executor(place) - - scope = fluid.Scope() - with fluid.scope_guard(scope): - inference_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model( - inference_model_path, exe) - - def __predict__(inputs): - with fluid.scope_guard(scope): - outputs = exe.run( - inference_prog, - feed=inputs, - fetch_list=fetch_targets, - return_numpy=True) - return outputs - - return __predict__ diff --git a/hub_module/modules/text/text_generation/plato2_en_base/utils/masking.py b/hub_module/modules/text/text_generation/plato2_en_base/utils/masking.py deleted file mode 100644 index 1f497d4658197de62f3e5309d8696fca6d9f817b..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_base/utils/masking.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Reader utils.""" - -import numpy as np - -import plato2_en_base.utils - - -def mask(batch_tokens, - vocab_size, - bos_id=1, - eos_id=2, - mask_id=3, - sent_b_starts=None, - labels=None, - is_unidirectional=False, - use_latent=False, - use_bow=False): - """ - Add mask for batch_tokens, return out, mask_label, mask_pos; - Note: mask_pos responding the batch_tokens after padded; - """ - batch_tokens = np.copy(batch_tokens) - max_len = max(map(len, batch_tokens)) - mask_label = [] - mask_pos = [] - if labels is not None: - label_pos = [] - - if is_unidirectional: - # unidirectional language model - if use_latent: - max_len += 1 - shift_len = 1 - else: - shift_len = 0 - for sent_index, sent in enumerate(batch_tokens): - sent_b_index = sent_b_starts[ - sent_index] if sent_b_starts is not None else 0 - need_cal = True - if labels is not None: - label_pos.append(sent_index * max_len + len(sent) - 1 + - shift_len) - if labels[sent_index] == 0: - need_cal = False - mask_label.extend(sent[sent_b_index + 1:]) - mask_pos.extend([ - sent_index * max_len + i + shift_len - for i in range(sent_b_index, - len(sent) - 1) - ]) - mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) - mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) - return_list = [mask_label, mask_pos] - - # latent related (bow label and pos) - if use_latent and use_bow: - bow_label = [] - bow_pos = [] - for sent_index, sent in enumerate(batch_tokens): - sent_b_index = sent_b_starts[ - sent_index] if sent_b_starts is not None else 0 - - def __filter__(tok_id): - # TODO: exclude [EOS] from bow loss - return True - - bow_pos.extend([ - sent_index for i in range(sent_b_index + 1, len(sent)) - if __filter__(sent[i]) - ]) - bow_label.extend([ - sent[i] for i in range(sent_b_index + 1, len(sent)) - if __filter__(sent[i]) - ]) - bow_label = np.array(bow_label).astype("int64").reshape([-1, 1]) - bow_pos = np.array(bow_pos).astype("int64").reshape([-1, 1]) - return_list += [bow_label, bow_pos] - else: - # bidirectional mask language model - total_token_num = sum(map(len, batch_tokens)) - prob_mask = np.random.rand(total_token_num) - # TODO: fix replace_ids, include [UNK] - replace_ids = np.random.randint( - 3, high=vocab_size, size=total_token_num) - prob_index = 0 - for sent_index, sent in enumerate(batch_tokens): - # add pair label position - if labels is not None: - label_pos.append(sent_index * max_len) - - # add mask label and position - for token_index, token in enumerate(sent): - if token == eos_id or token == bos_id: - continue - prob = prob_mask[prob_index + token_index] - if prob > 0.15: - continue - elif 0.03 < prob <= 0.15: - # mask - mask_label.append(sent[token_index]) - sent[token_index] = mask_id - mask_pos.append(sent_index * max_len + token_index) - elif 0.015 < prob <= 0.03: - # random replace - mask_label.append(sent[token_index]) - sent[token_index] = replace_ids[prob_index + token_index] - mask_pos.append(sent_index * max_len + token_index) - else: - # keep the original token - mask_label.append(sent[token_index]) - mask_pos.append(sent_index * max_len + token_index) - - prob_index += len(sent) - - mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) - mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) - return_list = [batch_tokens, mask_label, mask_pos] - - if labels is not None: - label_pos = np.array(label_pos).astype("int64").reshape([-1, 1]) - assert len(labels) == len(label_pos) - return_list.append(label_pos) - return return_list diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/__init__.py b/hub_module/modules/text/text_generation/plato2_en_large/models/__init__.py deleted file mode 100644 index 0b8bf8bc4ec234ef89a9d7314ea161a3876839f3..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/__init__.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Define model.""" - -from plato2_en_large.models.model_base import Model - -MODEL_REGISTRY = {} - -__all__ = [ - "MODEL_REGISTRY", "register_model", "create_model", "add_cmdline_args" -] - - -def register_model(name): - """ - Register a new model class. - """ - - def __wrapped__(cls): - if name in MODEL_REGISTRY: - raise ValueError(f"Cannot register duplicate model ({name})") - if not issubclass(cls, Model): - raise ValueError( - f"Model ({name}: {cls.__name__}) must extend Model") - MODEL_REGISTRY[name] = cls - return cls - - return __wrapped__ - - -def create_model(args, place) -> Model: - """ - Create a model. - """ - return MODEL_REGISTRY[args.model](args, place) - - -def add_cmdline_args(parser): - """ Add cmdline argument of Model. """ - group = parser.add_argument_group("Model") - - # Model - group.add_argument("--model", type=str, required=True) - - # Config - group.add_argument("--config_path", type=str, required=True) - - # Model related. - args, _ = parser.parse_known_args() - if args.model not in MODEL_REGISTRY: - raise ValueError(f"Unknown model type: {args.model}") - MODEL_REGISTRY[args.model].add_cmdline_args(parser) - return group - - -import plato2_en_large.models.nsp_model -import plato2_en_large.models.plato diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/generator.py b/hub_module/modules/text/text_generation/plato2_en_large/models/generator.py deleted file mode 100644 index fdd213853e5b54b146b2eef162d214ee9c9e95fb..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/generator.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Generator class""" - -import numpy as np -import paddle.fluid.layers as layers - -from plato2_en_large.utils.args import str2bool - - -class Generator(object): - """ - Generator class - - Use generator in inference phase. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Generator") - group.add_argument("--min_dec_len", type=int, default=1) - group.add_argument("--max_dec_len", type=int, default=64) - - group.add_argument( - "--decoding_strategy", - type=str, - default="topk_sampling", - choices=["beam_search", "topk_sampling", "topp_sampling"]) - group.add_argument("--temperature", type=float, default=1.) - group.add_argument("--ignore_unk", type=str2bool, default=True) - - # multi sampling - group.add_argument("--num_samples", type=int, default=None) - - # top-k sampling - group.add_argument("--topk", type=int, default=10) - - # top-p sampling - group.add_argument("--topp", type=float, default=0.9) - - # beam search - group.add_argument("--beam_size", type=int, default=10) - group.add_argument("--length_average", type=str2bool, default=True) - group.add_argument("--length_penalty", type=float, default=0.0) - - return group - - def __init__(self, args): - self.min_dec_len = args.min_dec_len - self.max_dec_len = args.max_dec_len - self.eos_id = args.eos_id - self.unk_id = args.unk_id - self.mask_id = args.mask_id - self.vocab_size = args.vocab_size - - # model related - - # basic settings - self.decoding_strategy = args.decoding_strategy - self.ignore_unk = args.ignore_unk - self.continuous_position = args.continuous_position - self.temperature = args.temperature - - # reranking - self.num_samples = args.num_samples - - # top-k sampling - self.topk = args.topk - - # top-p sampling - self.topp = args.topp - - # beam search - self.beam_size = args.beam_size - self.length_penalty = args.length_penalty - self.length_average = args.length_average - return - - def inference(self, model, inputs, outputs): - """ - Run inference. - - Args: - inputs(dict): Its key is input name(str) and its value is a Variable. - model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`. - - Returns: - dict(str:Variable): Its key is output name(str) and its value is a Variable. - """ - # prepare while loop - max_len = layers.fill_constant( - shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True) - min_len = layers.fill_constant( - shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True) - step_idx = layers.fill_constant( - shape=[1], dtype="int64", value=0, force_cpu=True) - - ids = layers.array_write( - layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx) - pos_biases = layers.array_write( - layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx) - scores = layers.array_write(inputs["init_score"], step_idx) - tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], - step_idx) - parent_idx = inputs["parent_idx"] - - if self.decoding_strategy == "beam_search": - beam_size = self.beam_size - else: - beam_size = 1 - - eos_penalty = np.zeros(self.vocab_size, dtype="float32") - eos_penalty[self.eos_id] = -1e9 - eos_penalty = layers.assign(eos_penalty) - - token_penalty = np.zeros(self.vocab_size, dtype="float32") - token_penalty[self.unk_id] = -1e9 - if self.mask_id >= 0: - token_penalty[self.mask_id] = -1e9 - token_penalty = layers.assign(token_penalty) - - # start while loop - cond = layers.less_than(x=step_idx, y=max_len) - while_op = layers.While(cond) - with while_op.block(): - pre_ids = layers.array_read(array=ids, i=step_idx) - pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) - pre_scores = layers.array_read(array=scores, i=step_idx) - pos_bias = layers.array_read(array=pos_biases, i=step_idx) - pos_bias = layers.gather(input=pos_bias, index=parent_idx) - - tmp_tgt_generation_mask = layers.array_read( - tgt_generation_mask, i=step_idx) - dtype = tmp_tgt_generation_mask.dtype - - append_mask = layers.fill_constant_batch_size_like( - input=pre_ids, value=1.0, shape=[-1, 1, 1], dtype=dtype) - tmp_tgt_generation_mask = layers.concat( - [tmp_tgt_generation_mask, append_mask], axis=2) - pre_mask = tmp_tgt_generation_mask = layers.gather( - input=tmp_tgt_generation_mask, index=parent_idx) - - pre_sent = layers.fill_constant_batch_size_like( - input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype) - - if self.continuous_position: - pre_pos = layers.elementwise_mul( - x=layers.fill_constant_batch_size_like( - input=pre_mask, - value=1, - shape=[-1, 1, 1], - dtype=pre_ids.dtype), - y=step_idx, - axis=0) + pos_bias - else: - pre_pos = layers.elementwise_mul( - x=layers.fill_constant_batch_size_like( - input=pre_mask, - value=1, - shape=[-1, 1, 1], - dtype=pre_ids.dtype), - y=step_idx, - axis=0) - - dec_out, _ = model._generation_network( - token_ids=pre_ids, - type_ids=pre_sent, - pos_ids=pre_pos, - generation_mask=tmp_tgt_generation_mask, - gather_idx=parent_idx) - logits = model._calc_logits(dec_out) - - # ignore unk and mask token - if self.ignore_unk: - logits = layers.elementwise_add(logits, token_penalty, axis=1) - - # min dec length - min_len_cond = layers.less_than(x=step_idx, y=min_len) - - def min_len_penalty(): - """Plus minimum length penalty.""" - return layers.elementwise_add(logits, eos_penalty, axis=1) - - def no_penalty(): - """No penalty.""" - return logits - - logits = layers.case([(min_len_cond, min_len_penalty)], - default=no_penalty) - - # get probs - probs = layers.softmax(logits / self.temperature) - - if self.decoding_strategy == "beam_search": - topk_scores, topk_indices = layers.topk( - input=probs, k=beam_size) - else: - if self.decoding_strategy.startswith("sampling"): - sampling_ids = layers.sampling_id(probs, dtype="int") - elif self.decoding_strategy.startswith("topk_sampling"): - topk_probs, _ = layers.topk(input=probs, k=self.topk) - ge_cond = layers.cast( - layers.greater_equal( - probs, layers.unsqueeze(topk_probs[:, -1], [1])), - "float32") - old_probs = probs - probs = probs * ge_cond / layers.reduce_sum( - topk_probs, dim=-1, keep_dim=True) - sampling_ids = layers.sampling_id(probs, dtype="int") - probs = old_probs - elif self.decoding_strategy.startswith("topp_sampling"): - sorted_probs, sorted_idx = layers.argsort( - probs, descending=True) - cum_sorted_probs = layers.cumsum( - sorted_probs, axis=1, exclusive=True) - lt_cond = layers.cast( - layers.less_than( - cum_sorted_probs, - layers.fill_constant_batch_size_like( - cum_sorted_probs, cum_sorted_probs.shape, - cum_sorted_probs.dtype, self.topp)), "float32") - old_probs = probs - candidate_probs = sorted_probs * lt_cond - probs = candidate_probs / layers.reduce_sum( - candidate_probs, dim=-1, keep_dim=True) - sampling_ids = layers.sampling_id(probs, dtype="int") - sampling_ids = layers.index_sample( - sorted_idx, layers.unsqueeze(sampling_ids, [1])) - sampling_ids = layers.squeeze(sampling_ids, [1]) - probs = old_probs - else: - raise ValueError(self.decoding_strategy) - - sampling_scores = layers.one_hot( - layers.unsqueeze(sampling_ids, [1]), probs.shape[1]) - sampling_scores = sampling_scores * probs - ( - 1 - sampling_scores) * 1e3 - topk_scores, topk_indices = layers.topk( - input=sampling_scores, k=1) - - pre_len = layers.cast(step_idx, "float32") - layers.increment(x=step_idx, value=1.0, in_place=True) - cur_len = layers.cast(step_idx, "float32") - - # update scores - if self.length_average: - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), y=pre_scores * pre_len, - axis=0) / cur_len - elif self.length_penalty > 0: - pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty) - cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty) - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), y=pre_scores * pre_lp, - axis=0) / cur_lp - else: - accu_scores = layers.elementwise_add( - x=layers.log(topk_scores), y=pre_scores, axis=0) - topk_indices = layers.lod_reset(topk_indices, pre_ids) - accu_scores = layers.lod_reset(accu_scores, pre_ids) - selected_ids, selected_scores, gather_idx = layers.beam_search( - pre_ids=pre_ids, - pre_scores=pre_scores, - ids=topk_indices, - scores=accu_scores, - beam_size=beam_size, - end_id=self.eos_id, - return_parent_idx=True) - - layers.array_write(selected_ids, i=step_idx, array=ids) - layers.array_write(selected_scores, i=step_idx, array=scores) - layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask) - layers.array_write(pos_bias, i=step_idx, array=pos_biases) - - layers.assign(gather_idx, parent_idx) - - length_cond = layers.less_than(x=step_idx, y=max_len) - finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) - layers.logical_and(x=length_cond, y=finish_cond, out=cond) - - finished_ids, finished_scores = layers.beam_search_decode( - ids, scores, beam_size=beam_size, end_id=self.eos_id) - - predictions = { - "finished_ids": finished_ids, - "finished_scores": finished_scores, - "token_ids": inputs["token_ids"], - "data_id": inputs["data_id"] - } - return predictions diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/model_base.py b/hub_module/modules/text/text_generation/plato2_en_large/models/model_base.py deleted file mode 100644 index 2bd66f4980cef44f183e5a42be99d9efa3acabc4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/model_base.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Model base.""" - -from abc import abstractmethod, ABC - -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy -import paddle.fluid.incubate.fleet.base.role_maker as role_maker -import paddle.fluid.layers as layers - -from plato2_en_large.models.optimizer import AdamW -from plato2_en_large.utils import init_pretraining_params, init_checkpoint, to_lodtensor -from plato2_en_large.utils.args import str2bool - - -class Model(ABC): - """ - Basic model wrapper for paddle. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Model") - # Init checkpoint - group.add_argument("--init_checkpoint", type=str, default="") - group.add_argument("--init_pretraining_params", type=str, default="") - - # Optimizer - group.add_argument( - "-lr", - "--learning_rate", - type=float, - default=1e-5, - help="The learning rate for optimizer.") - group.add_argument( - "--warmup_steps", type=int, default=0, help="The warmup steps.") - group.add_argument( - "--weight_decay", - type=float, - default=0.0, - help="The weight decay for optimizer.") - group.add_argument( - "--max_grad_norm", - type=float, - default=.1, - help="The maximum norm of gradient.") - - group.add_argument("--use_recompute", type=str2bool, default=False) - group.add_argument("--use_amp", type=str2bool, default=False) - group.add_argument("--amp_loss_scaling", type=float, default=12800) - return group - - def __init__(self, args, place): - self.place = place - self.exe = fluid.Executor(place) - - self.init_checkpoint = args.init_checkpoint - self.init_pretraining_params = args.init_pretraining_params - - self.learning_rate = args.learning_rate - self.warmup_steps = args.warmup_steps - self.weight_decay = args.weight_decay - self.max_grad_norm = args.max_grad_norm - - self.is_distributed = args.is_distributed - self.use_recompute = args.use_recompute - self.use_amp = args.use_amp - self.amp_loss_scaling = args.amp_loss_scaling - self.run_infer = args.get("run_infer", False) - self.batch_size = args.get("batch_size", 1) - self._build_programs() - return - - def _build_programs(self): - """ - Build programs. - - Build train_program, eval_program and inference_program. Only use in static graph mode. - """ - if self.run_infer: - self.startup_program = fluid.Program() - # build infer program - self.infer_program = fluid.Program() - with fluid.program_guard(self.infer_program, self.startup_program): - with fluid.unique_name.guard(): - self.infer_feed_dict = inputs = self._get_feed_dict( - is_infer=True) - outputs = self.forward(inputs, is_infer=True) - predictions = self.infer(inputs, outputs) - self.infer_fetch_dict = predictions - self.infer_program = self.infer_program.clone(for_test=True) - - self.program = self.infer_program - else: - if self.is_distributed: - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.use_experimental_executor = True - exec_strategy.num_threads = 4 - exec_strategy.num_iteration_per_drop_scope = 1 - - dist_strategy = DistributedStrategy() - dist_strategy.exec_strategy = exec_strategy - dist_strategy.nccl_comm_num = 1 - dist_strategy.fuse_all_reduce_ops = True - if self.use_recompute: - dist_strategy.forward_recompute = True - dist_strategy.enable_sequential_execution = True - if self.use_amp: - dist_strategy.use_amp = True - dist_strategy.amp_loss_scaling = self.amp_loss_scaling - self.dist_strategy = dist_strategy - - self.startup_program = fluid.Program() - # build train program - self.train_program = fluid.Program() - with fluid.program_guard(self.train_program, self.startup_program): - with fluid.unique_name.guard(): - self.feed_dict = inputs = self._get_feed_dict() - outputs = self.forward(inputs) - if self.is_distributed and self.use_recompute: - self.dist_strategy.recompute_checkpoints = outputs[ - "checkpoints"] - metrics, statistics = self.get_metrics_and_statistics( - inputs, outputs) - - # build eval program - self.eval_program = self.train_program.clone(for_test=True) - self.eval_fetch_dict = {**metrics, **statistics} - - scheduled_lr = self.optimize(metrics) - metrics["scheduled_lr"] = scheduled_lr - self.train_fetch_dict = metrics - - self.program = self.train_program - if self.is_distributed: - self.train_program = fleet.main_program - - self.exe.run(self.startup_program) - if self.init_pretraining_params != "": - init_pretraining_params(self.exe, self.init_pretraining_params, - self.program) - elif self.init_checkpoint != "": - init_checkpoint(self.exe, self.init_checkpoint, self.program) - return - - def load(self, model_dir, is_checkpoint=False): - """ - Load persistables or parameters. - """ - # TODO: support dygraph. - if is_checkpoint: - init_checkpoint(self.exe, model_dir, self.program) - else: - init_pretraining_params(self.exe, model_dir, self.program) - return - - def save(self, model_dir, is_checkpoint=False): - """ - Save persistables or parameters. - """ - # TODO: support dygraph. - if is_checkpoint: - fluid.io.save_persistables(self.exe, model_dir, self.program) - else: - fluid.io.save_params(self.exe, model_dir, self.program) - return - - @abstractmethod - def _get_feed_dict(self, is_infer=False): - """ - Return input feed list. - """ - pass - - def _get_feed(self, inputs, is_infer=False): - """ - Convert `inputs` into model's feed data format. - """ - if isinstance(inputs, list): - # return list direclty which is used in `get_data_loader`. - return inputs - for k in inputs: - if isinstance(inputs[k], list): - inputs[k] = to_lodtensor(inputs[k], self.place) - return inputs - - def get_data_loader(self, generator=None, is_infer=False): - """ - Return DataLoader. - - If generator is not `None`, the data loader set it as the batch generator. - """ - # TODO: support dygraph. - if is_infer: - feed_name_list, feed_list = zip(*self.infer_feed_dict.items()) - else: - feed_name_list, feed_list = zip(*self.feed_dict.items()) - loader = fluid.io.DataLoader.from_generator( - feed_list=feed_list, - capacity=64, - use_double_buffer=True, - iterable=True) - if generator is not None: - - def __wrapper__(): - for batch in generator(): - batch = self._get_feed(batch) - batch = [ - batch[name] for name in feed_name_list if name in batch - ] - yield batch - - loader.set_batch_generator(__wrapper__, self.place) - return loader - - @abstractmethod - def forward(self, inputs, is_infer=False): - """ - Run model main forward. - """ - pass - - @abstractmethod - def get_metrics_and_statistics(self, inputs, outputs): - """ - Get metrics and statistics. - """ - pass - - @abstractmethod - def infer(self, inputs, outputs): - """ - Run model inference. - """ - pass - - def optimize(self, metrics): - """ - Optimize the model by metrics(mainly `metrics["loss"]`). - """ - # TODO: support dygraph - if self.warmup_steps > 0: - scheduled_lr = layers.learning_rate_scheduler.noam_decay( - 1 / (self.warmup_steps * (self.learning_rate**2)), - self.warmup_steps) - else: - scheduled_lr = layers.create_global_var( - name=fluid.unique_name.generate("learning_rate"), - shape=[1], - value=self.learning_rate, - dtype="float32", - persistable=True) - grad_clip = fluid.clip.GradientClipByGlobalNorm(self.max_grad_norm) - - self.optimizer = AdamW( - learning_rate=scheduled_lr, - grad_clip=grad_clip, - weight_decay=self.weight_decay) - - if self.is_distributed: - self.optimizer = fleet.distributed_optimizer( - self.optimizer, strategy=self.dist_strategy) - - self.optimizer.minimize(metrics["loss"]) - return scheduled_lr - - def _execute(self, program, feed, fetch_dict, **kwargs): - """ - Execute program. - """ - fetch_list = [var.name for var in fetch_dict.values()] - fetch_vars = self.exe.run(program, feed, fetch_list, **kwargs) - return dict(zip(fetch_dict.keys(), fetch_vars)) - - def train_step(self, inputs): - """ - Run one training step. - """ - # TODO: support dygraph. - return self._execute( - self.train_program, - self._get_feed(inputs), - self.train_fetch_dict, - use_program_cache=True) - - def eval_step(self, inputs): - """ - Run one evaluation step. - """ - # TODO: support dygraph. - return self._execute(self.eval_program, self._get_feed(inputs), - self.eval_fetch_dict) - - def infer_step(self, inputs): - """ - Run one inference step. - """ - # TODO: support dygraph. - return self._execute(self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict) - - def save_inference_model(self, inference_model_path): - """ - Save the inference model. - """ - feed_list = [var.name for var in self.infer_feed_dict.values()] - fetch_list = list(self.infer_fetch_dict.values()) - - fluid.io.save_inference_model(inference_model_path, feed_list, - fetch_list, self.exe, self.infer_program) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/nsp_model.py b/hub_module/modules/text/text_generation/plato2_en_large/models/nsp_model.py deleted file mode 100644 index 93f9c4bb648ec73ae21c1088a64f140ac26d47c6..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/nsp_model.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""NSP model.""" - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - -from . import register_model -from .model_base import Model -from .unified_transformer import UnifiedTransformer - - -@register_model("NSPModel") -class NSPModel(UnifiedTransformer): - """NSP model.""" - - def _get_feed_dict(self, is_infer=False): - """ - Get the feed list of the model. - - Args: - is_infer(bool): True if running inference. - - Returns: - list(Variable): The feed list. - list(str): The name of each Variable in feed list. - """ - feed_dict = {} - feed_dict["token_ids"] = layers.data( - name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["type_ids"] = layers.data( - name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["pos_ids"] = layers.data( - name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - - feed_dict["attention_mask"] = layers.data( - name="attention_mask", - shape=[-1, self.max_seq_len, self.max_seq_len], - dtype=self.dtype) - feed_dict["label_pos"] = layers.data( - name="label_pos", shape=[-1, 1], dtype="int64") - - if not is_infer: - feed_dict["label"] = layers.data( - name="label", shape=[-1, 1], dtype="int64") - feed_dict["tgt_label"] = layers.data( - name="tgt_ids", shape=[-1, 1], dtype="int64") - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", shape=[-1, 1], dtype="int64") - - feed_dict["data_id"] = layers.data( - name="data_id", shape=[-1, 1], dtype="int64") - return feed_dict - - def _get_feed(self, inputs, is_infer=False): - return Model._get_feed(self, inputs, is_infer) - - def forward(self, inputs, is_infer=False): - outputs = {} - self.generation_caches = None - outputs["enc_out"], self.checkpoints = self._generation_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - generation_mask=inputs["attention_mask"]) - return outputs - - def _get_metrics(self, inputs, outputs): - metrics = {} - fc_out = self._calc_logits(outputs["enc_out"], inputs["tgt_pos"]) - lm_loss = layers.softmax_with_cross_entropy( - logits=fc_out, label=inputs["tgt_pos"]) - need_cal = layers.not_equal( - inputs["tgt_label"], - layers.fill_constant(shape=[1], dtype="int64", value=1)) - need_cal = layers.cast(need_cal, self.dtype) - mean_lm_loss = layers.reduce_sum( - lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10) - - pooled_out = self._get_pooled_output(outputs["enc_out"], - inputs["label_pos"]) - nsp_fc_out = layers.fc( - input=pooled_out, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self.param_initializer), - bias_attr="next_sent_fc.b_0") - nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy( - logits=nsp_fc_out, label=inputs["label"], return_softmax=True) - - nsp_acc = layers.accuracy(nsp_softmax, inputs["label"]) - mean_nsp_loss = layers.mean(nsp_loss) - - metrics["loss"] = mean_lm_loss + mean_nsp_loss - metrics["lm_loss"] = mean_lm_loss - metrics["nsp_loss"] = mean_nsp_loss - metrics["nsp_acc"] = nsp_acc - return metrics - - def infer(self, inputs, outputs): - pooled_out = self._get_pooled_output(outputs["enc_out"], - inputs["label_pos"]) - nsp_fc_out = layers.fc( - input=pooled_out, - size=2, - param_attr=fluid.ParamAttr( - name="next_sent_fc.w_0", initializer=self.param_initializer), - bias_attr="next_sent_fc.b_0") - scores = layers.softmax(nsp_fc_out) - predictions = {"scores": scores, "data_id": inputs["data_id"]} - return predictions - - def infer_step(self, inputs): - return Model.infer_step(self, inputs) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/optimizer.py b/hub_module/modules/text/text_generation/plato2_en_large/models/optimizer.py deleted file mode 100644 index 38a9cd36fb490134427e67fa2909dbae48ed9f8c..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/optimizer.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Optimizer.""" - -import re - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -class AdamW(fluid.optimizer.AdamOptimizer): - """AdamW object for dygraph""" - - def __init__(self, *args, **kwargs): - weight_decay = kwargs.pop('weight_decay', None) - var_name_to_exclude = kwargs.pop( - 'var_name_to_exclude', '.*layer_norm_scale|.*layer_norm_bias|.*b_0') - super(AdamW, self).__init__(*args, **kwargs) - self.wd = weight_decay - self.pat = re.compile(var_name_to_exclude) - - def apply_optimize(self, loss, startup_program, params_grads): - """Update params with weight decay.""" - super(AdamW, self).apply_optimize(loss, startup_program, params_grads) - for p, g in params_grads: - if not self.pat.match(p.name): - layers.assign(p * (1. - self.wd * self._learning_rate), p) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/plato.py b/hub_module/modules/text/text_generation/plato2_en_large/models/plato.py deleted file mode 100644 index 68031b24387d6e814fa76a296d8644e2dabbdb7e..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/plato.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Plato model.""" - -import numpy as np -import paddle.fluid as fluid -import paddle.fluid.layers as layers - -from . import register_model -from .model_base import Model -from .unified_transformer import UnifiedTransformer -from .transformer_block import encoder, pre_process_layer -from plato2_en_large.utils import repeat_array_or_tensor -from plato2_en_large.utils.args import str2bool -from .generator import Generator - - -@register_model("Plato") -class Plato(UnifiedTransformer): - """Plato model.""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = UnifiedTransformer.add_cmdline_args(parser) - group.add_argument("--use_bow", type=str2bool, default=True) - group.add_argument("--use_entropy", type=str2bool, default=False) - return group - - def __init__(self, args, place): - # latent related - self.mask_id = args.mask_id - self.latent_type_size = args.latent_type_size - self.latent_emb_name = "latent_embedding" - self.use_bow = args.use_bow - self.use_entropy = args.use_entropy - - super(Plato, self).__init__(args, place) - - def _get_feed_dict(self, is_infer=False): - feed_dict = {} - feed_dict["token_ids"] = layers.data( - name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["type_ids"] = layers.data( - name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["pos_ids"] = layers.data( - name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - - if not is_infer: - feed_dict["recognition_mask"] = layers.data( - name="recognition_mask", - shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], - dtype=self.dtype) - feed_dict["generation_mask"] = layers.data( - name="generation_mask", - shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], - dtype=self.dtype) - - if is_infer: - feed_dict["tgt_ids"] = layers.data( - name="tgt_ids", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["init_score"] = layers.data( - name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) - feed_dict["parent_idx"] = layers.data( - name="parent_idx", shape=[-1], dtype="int64") - - feed_dict["tgt_generation_mask"] = layers.data( - name="tgt_generation_mask", - shape=[-1, 1, self.max_seq_len + 1], - dtype="float32") - feed_dict["latent_id"] = layers.data( - name="latent_id", shape=[-1, 1], dtype="int64") - else: - feed_dict["tgt_label"] = layers.data( - name="tgt_label", shape=[-1, 1], dtype="int64") - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", shape=[-1, 1], dtype="int64") - - if self.use_bow: - feed_dict["bow_label"] = layers.data( - name="bow_label", shape=[-1, 1], dtype="int64") - feed_dict["bow_pos"] = layers.data( - name="bow_pos", shape=[-1, 1], dtype="int64") - - feed_dict["data_id"] = layers.data( - name="data_id", shape=[-1, 1], dtype="int64") - return feed_dict - - def _recognition_network(self, token_ids, type_ids, pos_ids, - recognition_mask): - mask_id = layers.fill_constant_batch_size_like( - input=token_ids, - shape=[-1, 1, 1], - value=self.mask_id, - dtype="int64") - mask_emb = layers.embedding( - input=mask_id, - size=[self.vocab_size, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.token_emb_name, initializer=self.param_initializer)) - emb_out, n_head_self_attn_mask = self._gen_input( - token_ids, type_ids, pos_ids, recognition_mask, aux_emb=mask_emb) - - recognition_out, checkpoints = self._encode(emb_out, - n_head_self_attn_mask) - - recognition_feat = layers.slice( - input=recognition_out, axes=[1], starts=[0], ends=[1]) - recognition_feat = layers.fc( - input=recognition_feat, - size=self.hidden_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="recognition_fc.w_0", initializer=self.param_initializer), - bias_attr="recognition_fc.b_0") - logits = layers.fc( - input=recognition_feat, - size=self.latent_type_size, - param_attr=fluid.ParamAttr( - name=self.latent_emb_name, initializer=self.param_initializer), - bias_attr="recognition_bias") - return logits, checkpoints - - def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): - u = layers.uniform_random_batch_size_like( - logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) - u.stop_gradient = True - gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) - y = logits + gumbel - return layers.softmax(y / tau) - - def forward(self, inputs, is_infer=False): - """ - Run model main forward. - """ - outputs = {} - if is_infer: - self.generation_caches = [{ - "k": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_key * self.n_head], - dtype=self.dtype, - value=0), - "v": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_value * self.n_head], - dtype=self.dtype, - value=0), - } for i in range(self.n_layer)] - else: - self.generation_caches = None - - latent_embeddings = layers.create_parameter( - shape=[self.emb_size, self.latent_type_size], - dtype=self.dtype, - attr=fluid.ParamAttr( - name=self.latent_emb_name, initializer=self.param_initializer)) - - if is_infer: - latent_id = inputs["latent_id"] - weights = layers.one_hot(latent_id, self.latent_type_size) - else: - logits, recognition_checkpoints = self._recognition_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - recognition_mask=inputs["recognition_mask"], - ) - outputs["post_probs"] = layers.softmax(logits) - weights = self._gumbel_softmax(logits) - outputs["checkpoints"] = recognition_checkpoints - - latent_emb = layers.matmul( - x=weights, y=latent_embeddings, transpose_y=True) - outputs["enc_out"], generation_checkpoints = self._generation_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - generation_mask=inputs["generation_mask"], - aux_emb=layers.unsqueeze(latent_emb, axes=[1]), - gather_idx=inputs.get("parent_idx", None), - ) - - if not is_infer: - outputs["checkpoints"].extend(generation_checkpoints) - return outputs - - def _calc_bow_logits(self, enc_out, checkpoints, bow_pos): - """Get the logits of generation.""" - bow_feat = layers.slice(input=enc_out, axes=[1], starts=[0], ends=[1]) - bow_feat = layers.reshape(x=bow_feat, shape=[-1, self.hidden_size]) - bow_pos = layers.cast(x=bow_pos, dtype="int32") - bow_feat = layers.gather(input=bow_feat, index=bow_pos) - - bow_trans_feat = layers.fc( - input=bow_feat, - size=self.emb_size, - act=self.hidden_act, - param_attr=fluid.ParamAttr( - name="bow_trans_fc.w_0", initializer=self.param_initializer), - bias_attr=fluid.ParamAttr(name="bow_trans_fc.b_0")) - - bow_trans_feat = pre_process_layer( - bow_trans_feat, self.post_cls_cmd, name="bow_trans") - - checkpoints.append(bow_trans_feat) - - if self.weight_sharing: - fc_out = layers.matmul( - x=bow_trans_feat, - y=fluid.default_main_program().global_block().var( - self.token_emb_name), - transpose_y=True) - if self.cls_bias: - fc_out += layers.create_parameter( - shape=[self.vocab_size], - dtype=self.dtype, - attr=fluid.ParamAttr(name="bow_out_fc.b_0"), - is_bias=True) - else: - bow_out_bias_attr = fluid.ParamAttr( - name="bow_out_fc.b_0") if self.cls_bias else False - fc_out = layers.fc( - input=bow_trans_feat, - size=self.vocab_size, - param_attr=fluid.ParamAttr( - name="bow_out_fc.w_0", initializer=self.param_initializer), - bias_attr=bow_out_bias_attr) - return fc_out - - def _get_metrics(self, inputs, outputs): - metrics = super(Plato, self)._get_metrics(inputs, outputs) - - if self.use_bow: - fc_out = self._calc_bow_logits( - outputs["enc_out"], outputs["checkpoints"], inputs["bow_pos"]) - bow_loss = layers.softmax_with_cross_entropy( - logits=fc_out, label=inputs["bow_label"]) - mean_bow_loss = layers.mean(bow_loss) - metrics["token_bow_loss"] = mean_bow_loss - metrics["loss"] = metrics["loss"] + mean_bow_loss - - entropy_loss = layers.reduce_sum( - outputs["post_probs"] * layers.log(outputs["post_probs"]), dim=1) - mean_entropy_loss = layers.mean(entropy_loss) - metrics["entropy_loss"] = mean_entropy_loss - if self.use_entropy: - metrics["loss"] = metrics["loss"] + mean_entropy_loss - return metrics - - def infer_step(self, inputs): - """ - Run one inference step. - """ - if self.do_generation: - batch_size = len(inputs["data_id"]) - new_bsz = batch_size * self.latent_type_size - inputs = { - name: repeat_array_or_tensor(array_or_tensor, self.place, - self.latent_type_size) - for name, array_or_tensor in inputs.items() - } - # Add latent_id - inputs["latent_id"] = np.array([ - i for i in range(self.latent_type_size) - for _ in range(batch_size) - ], - dtype="int64").reshape([-1, 1]) - - return super(Plato, self).infer_step(inputs) - else: - return self._execute(self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/transformer_block.py b/hub_module/modules/text/text_generation/plato2_en_large/models/transformer_block.py deleted file mode 100644 index 3d98a8a15b14092a4807d66760058635760e4cde..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/transformer_block.py +++ /dev/null @@ -1,392 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer block.""" - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - gather_idx=None, - store=False, - param_initializer=None, - name="multi_head_att"): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors.") - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_query_fc.w_0", initializer=param_initializer), - bias_attr=name + "_query_fc.b_0") - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_key_fc.w_0", initializer=param_initializer), - bias_attr=name + "_key_fc.b_0") - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_value_fc.w_0", initializer=param_initializer), - bias_attr=name + "_value_fc.b_0") - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape( - x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape( - x=trans_x, - shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], - inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product, use_cudnn=True) - if dropout_rate: - weights = layers.dropout( - weights, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - cache_k, cache_v = cache["k"], cache["v"] - select_k = layers.gather(cache_k, index=gather_idx) - select_v = layers.gather(cache_v, index=gather_idx) - select_k = layers.reshape(select_k, shape=[0, 0, d_key * n_head]) - select_v = layers.reshape(select_v, shape=[0, 0, d_value * n_head]) - if store: - k = layers.concat([select_k, k], axis=1) - v = layers.concat([select_v, v], axis=1) - layers.assign(k, cache["k"]) - layers.assign(v, cache["v"]) - else: - #k = select_k - #v = select_v - tmp_k = layers.concat([select_k, k[:, :1]], axis=1) - tmp_v = layers.concat([select_v, v[:, :1]], axis=1) - layers.assign(tmp_k, cache["k"]) - layers.assign(tmp_v, cache["v"]) - k = layers.concat([select_k, k], axis=1) - v = layers.concat([select_v, v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, - dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_output_fc.w_0", initializer=param_initializer), - bias_attr=name + "_output_fc.b_0") - return proj_out - - -def positionwise_feed_forward(x, - d_inner_hid, - d_hid, - dropout_rate, - hidden_act, - param_initializer=None, - name="ffn"): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=hidden_act, - param_attr=fluid.ParamAttr( - name=name + "_fc_0.w_0", initializer=param_initializer), - bias_attr=name + "_fc_0.b_0") - if dropout_rate: - hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr( - name=name + "_fc_1.w_0", initializer=param_initializer), - bias_attr=name + "_fc_1.b_0") - return out - - -def pre_post_process_layer(prev_out, - out, - process_cmd, - dropout_rate=0., - epsilon=1e-5, - name=""): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr( - name=name + "_layer_norm_scale", - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - name=name + "_layer_norm_bias", - initializer=fluid.initializer.Constant(0.)), - epsilon=epsilon) - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, - dropout_prob=dropout_rate, - dropout_implementation="upscale_in_train", - is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer(input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name="", - epsilon=1e-5, - cache=None, - gather_idx=None, - store=False): - """ - The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the pre_process_layer / post_process_layer to add residual connection, - layer normalization and droput. - """ - attn_output = multi_head_attention( - pre_process_layer( - input, - preprocess_cmd, - prepostprocess_dropout, - epsilon=epsilon, - name=name + "_pre_att"), - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + "_multi_head_att", - cache=cache, - gather_idx=gather_idx, - store=store) - attn_output = post_process_layer( - input, - attn_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + "_post_att", - epsilon=epsilon) - ffd_output = positionwise_feed_forward( - pre_process_layer( - attn_output, - preprocess_cmd, - prepostprocess_dropout, - epsilon=epsilon, - name=name + "_pre_ffn"), - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + "_ffn") - ffd_output = post_process_layer( - attn_output, - ffd_output, - postprocess_cmd, - prepostprocess_dropout, - name=name + "_post_ffn", - epsilon=epsilon) - return ffd_output, [attn_output, ffd_output] - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name="", - epsilon=1e-5, - n_layer_per_block=1, - param_share="normal", - caches=None, - gather_idx=None, - store=False): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer. - """ - checkpoints = [] - names = [] - if param_share == "inner_share": - for _ in range(n_layer // n_layer_per_block): - for i in range(n_layer_per_block): - names.append(name + "_layer_" + str(i)) - else: - for i in range(n_layer // n_layer_per_block): - for _ in range(n_layer_per_block): - names.append(name + "_layer_" + str(i)) - - for i in range(n_layer): - enc_output, cps = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - epsilon=epsilon, - name=names[i], - cache=caches[i] if caches is not None else None, - gather_idx=gather_idx, - store=store) - checkpoints.extend(cps) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, - preprocess_cmd, - prepostprocess_dropout, - name="post_encoder", - epsilon=epsilon) - - return enc_output, checkpoints diff --git a/hub_module/modules/text/text_generation/plato2_en_large/models/unified_transformer.py b/hub_module/modules/text/text_generation/plato2_en_large/models/unified_transformer.py deleted file mode 100644 index c324d99f55fae625d0a161920ca05cc5f006a871..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/models/unified_transformer.py +++ /dev/null @@ -1,457 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Unified Transformer model.""" - -import numpy as np -import paddle.fluid as fluid -import paddle.fluid.layers as layers - -from . import register_model -from .model_base import Model -from .transformer_block import encoder, pre_process_layer -from plato2_en_large.utils.args import str2bool -from plato2_en_large.utils import repeat_array_or_tensor, slice_array_or_tensor -from .generator import Generator - - -@register_model("UnifiedTransformer") -class UnifiedTransformer(Model): - """Unified Transformer""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = Model.add_cmdline_args(parser) - group.add_argument("--max_seq_len", type=int, default=256) - group.add_argument("--weight_sharing", type=str2bool, default=True) - group.add_argument("--mem_efficient", type=str2bool, default=False) - - Generator.add_cmdline_args(parser) - return group - - def __init__(self, args, place): - self.max_seq_len = args.max_seq_len - - self.emb_size = args.emb_size or args.hidden_size - self.hidden_size = args.hidden_size - - self.n_layer = args.num_hidden_layers - self.n_head = args.num_attention_heads - self.d_key = args.get("key_size", self.hidden_size // self.n_head) - self.d_value = args.get("value_size", self.hidden_size // self.n_head) - self.inner_hidden_size = args.get("inner_hidden_size", - self.hidden_size * 4) - - self.vocab_size = args.vocab_size - self.max_position_seq_len = args.max_position_embeddings - self.type_size = args.type_vocab_size - self.token_emb_name = "word_embedding" - self.type_emb_name = "sent_embedding" - self.pos_emb_name = "pos_embedding" - - self.epsilon = args.epsilon or 1e-5 - self.n_layer_per_block = args.n_layer_per_block or 1 - self.pre_encoder_cmd = args.get("pre_encoder_cmd", "nd") - self.preprocess_cmd = args.get("preprocess_cmd", "") - self.postprocess_cmd = args.get("postprocess_cmd", "dan") - self.post_cls_cmd = args.get("post_cls_cmd", "n") - self.cls_bias = args.get("cls_bias", True) - if self.hidden_size != self.emb_size: - self.emb_mapping_in = True - else: - self.emb_mapping_in = args.get("emb_mapping_in", False) - - self.hidden_act = args.hidden_act - self.prepostprocess_dropout = args.hidden_dropout_prob - self.attention_dropout = args.attention_probs_dropout_prob - self.weight_sharing = args.weight_sharing - - self.mem_efficient = args.mem_efficient - - self.dtype = "float32" - - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self.param_initializer = fluid.initializer.TruncatedNormal( - scale=args.initializer_range) - - # task-related - self.generator = Generator(args) - self.do_generation = args.do_generation - - super(UnifiedTransformer, self).__init__(args, place) - - def _gen_input(self, token_ids, type_ids, pos_ids, input_mask, - aux_emb=None): - token_emb_out = layers.embedding( - input=token_ids, - size=[self.vocab_size, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.token_emb_name, initializer=self.param_initializer)) - type_emb_out = layers.embedding( - input=type_ids, - size=[self.type_size, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.type_emb_name, initializer=self.param_initializer)) - pos_emb_out = layers.embedding( - input=pos_ids, - size=[self.max_position_seq_len, self.emb_size], - dtype=self.dtype, - param_attr=fluid.ParamAttr( - name=self.pos_emb_name, initializer=self.param_initializer)) - emb_out = token_emb_out + type_emb_out + pos_emb_out - - # auxiliary memory embeddings - if aux_emb is not None: - emb_out = layers.concat([aux_emb, emb_out], axis=1) - - # post process of embedding - emb_out = pre_process_layer( - emb_out, - self.pre_encoder_cmd, - self.prepostprocess_dropout, - name="pre_encoder", - epsilon=self.epsilon) - if self.emb_mapping_in: - emb_out = layers.fc( - input=emb_out, - num_flatten_dims=2, - size=self.hidden_size, - param_attr=fluid.ParamAttr( - name="emb_hidden_mapping", - initializer=self.param_initializer), - bias_attr="emb_hidden_mapping_bias") - - # generate n-head self-attention mask - self_attn_mask = input_mask - self_attn_mask = layers.scale( - x=self_attn_mask, scale=1e4, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = layers.stack( - x=[self_attn_mask] * self.n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - return emb_out, n_head_self_attn_mask - - def _get_pooled_output(self, enc_out, pos): - enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) - pos = layers.cast(x=pos, dtype="int32") - feat = layers.gather(input=enc_out, index=pos) - - pooled_out = layers.fc( - input=feat, - size=self.hidden_size, - act="tanh", - param_attr=fluid.ParamAttr( - name="pooled_fc.w_0", initializer=self.param_initializer), - bias_attr="pooled_fc.b_0") - return pooled_out - - def _generation_network(self, - token_ids, - type_ids, - pos_ids, - generation_mask, - aux_emb=None, - gather_idx=None): - emb_out, n_head_self_attn_mask = self._gen_input( - token_ids, type_ids, pos_ids, generation_mask, aux_emb=aux_emb) - return self._encode( - emb_out, - n_head_self_attn_mask, - self.generation_caches, - gather_idx=gather_idx) - - def _encode(self, - emb_out, - n_head_self_attn_mask, - caches=None, - gather_idx=None): - return encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self.n_layer, - n_head=self.n_head, - d_key=self.d_key, - d_value=self.d_value, - d_model=self.hidden_size, - d_inner_hid=self.inner_hidden_size, - prepostprocess_dropout=self.prepostprocess_dropout, - attention_dropout=self.attention_dropout, - relu_dropout=0, - hidden_act=self.hidden_act, - preprocess_cmd=self.preprocess_cmd, - postprocess_cmd=self.postprocess_cmd, - param_initializer=self.param_initializer, - epsilon=self.epsilon, - n_layer_per_block=self.n_layer_per_block, - name="encoder", - caches=caches, - gather_idx=gather_idx, - store=caches is not None) - - def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): - u = layers.uniform_random_batch_size_like( - logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) - u.stop_gradient = True - gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) - y = logits + gumbel - return layers.softmax(y / tau) - - def _get_feed_dict(self, is_infer=False): - """ - Get the feed list of the model. - - Args: - is_infer(bool): True if running inference. - - Returns: - list(Variable): The feed list. - list(str): The name of each Variable in feed list. - """ - feed_dict = {} - feed_dict["token_ids"] = layers.data( - name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["type_ids"] = layers.data( - name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - feed_dict["pos_ids"] = layers.data( - name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") - - feed_dict["generation_mask"] = layers.data( - name="generation_mask", - shape=[-1, self.max_seq_len, self.max_seq_len], - dtype=self.dtype) - - if is_infer: - feed_dict["tgt_ids"] = layers.data( - name="tgt_ids", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", - shape=[-1, self.max_seq_len, 1], - dtype="int64", - lod_level=2) - feed_dict["init_score"] = layers.data( - name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) - feed_dict["parent_idx"] = layers.data( - name="parent_idx", shape=[-1], dtype="int64") - - feed_dict["tgt_generation_mask"] = layers.data( - name="tgt_generation_mask", - shape=[-1, 1, self.max_seq_len], - dtype="float32") - else: - feed_dict["tgt_label"] = layers.data( - name="tgt_label", shape=[-1, 1], dtype="int64") - feed_dict["tgt_pos"] = layers.data( - name="tgt_pos", shape=[-1, 1], dtype="int64") - - feed_dict["data_id"] = layers.data( - name="data_id", shape=[-1, 1], dtype="int64") - return feed_dict - - def forward(self, inputs, is_infer=False): - """ - Run model main forward. - """ - outputs = {} - if is_infer: - self.generation_caches = [{ - "k": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_key * self.n_head], - dtype=self.dtype, - value=0), - "v": - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], - shape=[-1, 0, self.d_value * self.n_head], - dtype=self.dtype, - value=0), - } for i in range(self.n_layer)] - else: - self.generation_caches = None - - outputs["enc_out"], generation_checkpoints = self._generation_network( - token_ids=inputs["token_ids"], - type_ids=inputs["type_ids"], - pos_ids=inputs["pos_ids"], - generation_mask=inputs["generation_mask"], - gather_idx=inputs.get("parent_idx", None)) - - if not is_infer: - outputs["checkpoints"] = generation_checkpoints - return outputs - - def _calc_logits(self, enc_out, checkpoints=None, seq_pos=None): - """Get the logits of generation.""" - enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) - if seq_pos is not None: - seq_pos = layers.cast(x=seq_pos, dtype="int32") - seq_feat = layers.gather(input=enc_out, index=seq_pos) - else: - seq_feat = enc_out - - seq_trans_feat = layers.fc( - input=seq_feat, - size=self.emb_size, - act=self.hidden_act, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", - initializer=self.param_initializer), - bias_attr=fluid.ParamAttr(name="mask_lm_trans_fc.b_0")) - - seq_trans_feat = pre_process_layer( - seq_trans_feat, self.post_cls_cmd, name="mask_lm_trans") - - if checkpoints is not None: - checkpoints.append(seq_trans_feat) - - if self.weight_sharing: - fc_out = layers.matmul( - x=seq_trans_feat, - y=fluid.default_main_program().global_block().var( - self.token_emb_name), - transpose_y=True) - if self.cls_bias: - fc_out += layers.create_parameter( - shape=[self.vocab_size], - dtype=self.dtype, - attr=fluid.ParamAttr(name="mask_lm_out_fc.b_0"), - is_bias=True) - else: - seq_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0") if self.cls_bias else False - fc_out = layers.fc( - input=seq_trans_feat, - size=self.vocab_size, - param_attr=fluid.ParamAttr( - name="mask_lm_out_fc.w_0", - initializer=self.param_initializer), - bias_attr=seq_out_bias_attr) - return fc_out - - def _get_metrics(self, inputs, outputs): - metrics = {} - - fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], - inputs["tgt_pos"]) - tgt_lm_loss = layers.softmax_with_cross_entropy( - logits=fc_out, label=inputs["tgt_label"]) - mean_tgt_lm_loss = layers.mean(tgt_lm_loss) - loss = mean_tgt_lm_loss - metrics["token_lm_loss"] = mean_tgt_lm_loss - - metrics["loss"] = loss - return metrics - - def _get_statistics(self, inputs, outputs): - statistics = {} - if "tgt_label" in inputs: - statistics["tokens_num"] = layers.reduce_sum( - layers.fill_constant_batch_size_like( - input=inputs["tgt_label"], - value=1.0, - shape=[-1], - dtype="int64")) - statistics["batch_size"] = layers.reduce_sum( - layers.fill_constant_batch_size_like( - input=inputs["token_ids"], value=1.0, shape=[-1], - dtype="int64")) - return statistics - - def get_metrics_and_statistics(self, inputs, outputs): - """ - Get metrics and statistics. - """ - metrics = self._get_metrics(inputs, outputs) - statistics = self._get_statistics(inputs, outputs) - return metrics, statistics - - def infer(self, inputs, outputs): - """ - Run model inference. - """ - if self.do_generation: - return self.generator.inference(self, inputs, outputs) - else: - raise NotImplementedError - - def _run_generation(self, inputs): - """ - Run generation. - """ - batch_size = len(inputs["data_id"]) - inputs["parent_idx"] = np.array(range(batch_size), dtype="int64") - outputs = self._execute( - self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict, - return_numpy=False) - - predictions = [] - data_id_list = np.array(outputs["data_id"]).reshape(-1).tolist() - token_ids_list = np.array(outputs["token_ids"]).squeeze(2).tolist() - seq_ids = outputs["finished_ids"] - seq_ids_np = np.array(outputs["finished_ids"]) - seq_scores_np = np.array(outputs["finished_scores"]) - for i, (data_id, token_ids) in enumerate( - zip(data_id_list, token_ids_list)): - start = seq_ids.lod()[0][i] - end = seq_ids.lod()[0][i + 1] - for j in range(start, end): - sub_start = seq_ids.lod()[1][j] - sub_end = seq_ids.lod()[1][j + 1] - info = {} - info["data_id"] = data_id - info["decode_score"] = float(seq_scores_np[sub_end - 1]) - info["context_token_ids"] = token_ids - info["response_token_ids"] = seq_ids_np[sub_start: - sub_end].tolist() - predictions.append(info) - return predictions - - def infer_step(self, inputs): - """ - Run one inference step. - """ - if self.do_generation: - if self.generator.num_samples: - inputs = { - name: repeat_array_or_tensor(array_or_tensor, self.place, - self.generator.num_samples) - for name, array_or_tensor in inputs.items() - } - - if self.mem_efficient: - predictions = [] - for idx in range(0, len(inputs["data_id"]), self.batch_size): - part_inputs = { - name: slice_array_or_tensor(array_or_tensor, self.place, - idx, idx + self.batch_size) - for name, array_or_tensor in inputs.items() - } - part_outputs = self._run_generation(part_inputs) - predictions.extend(part_outputs) - else: - predictions = self._run_generation(inputs) - return predictions - else: - return self._execute(self.infer_program, - self._get_feed(inputs, is_infer=True), - self.infer_fetch_dict) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/module.py b/hub_module/modules/text/text_generation/plato2_en_large/module.py deleted file mode 100644 index 09c4873515d49f27306be20c95341acfc8f09349..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/module.py +++ /dev/null @@ -1,194 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import os -import json -import sys -import argparse -import contextlib -from collections import namedtuple - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import plato2_en_large.models as plato_models -from plato2_en_large.tasks.dialog_generation import DialogGeneration -from plato2_en_large.utils import check_cuda, Timer -from plato2_en_large.utils.args import parse_args - - -@moduleinfo( - name="plato2_en_large", - version="1.0.0", - summary= - "A novel pre-training model for dialogue generation, incorporated with latent discrete variables for one-to-many relationship modeling.", - author="baidu-nlp", - author_email="", - type="nlp/text_generation", -) -class Plato(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - if "CUDA_VISIBLE_DEVICES" not in os.environ: - raise RuntimeError( - "The module only support GPU. Please set the environment variable CUDA_VISIBLE_DEVICES." - ) - - args = self.setup_args() - self.task = DialogGeneration(args) - self.model = plato_models.create_model(args, fluid.CUDAPlace(0)) - self.Example = namedtuple("Example", ["src", "data_id"]) - self._interactive_mode = False - - def setup_args(self): - """ - Setup arguments. - """ - assets_path = os.path.join(self.directory, "assets") - vocab_path = os.path.join(assets_path, "vocab.txt") - init_pretraining_params = os.path.join(assets_path, "32L", "Plato") - spm_model_file = os.path.join(assets_path, "spm.model") - nsp_inference_model_path = os.path.join(assets_path, "32L", "NSP") - config_path = os.path.join(assets_path, "32L.json") - - # ArgumentParser.parse_args use argv[1:], it will drop the first one arg, so the first one in sys.argv should be "" - sys.argv = [ - "", "--model", "Plato", "--vocab_path", - "%s" % vocab_path, "--do_lower_case", "False", - "--init_pretraining_params", - "%s" % init_pretraining_params, "--spm_model_file", - "%s" % spm_model_file, "--nsp_inference_model_path", - "%s" % nsp_inference_model_path, "--ranking_score", "nsp_score", - "--do_generation", "True", "--batch_size", "1", "--config_path", - "%s" % config_path - ] - - parser = argparse.ArgumentParser() - plato_models.add_cmdline_args(parser) - DialogGeneration.add_cmdline_args(parser) - args = parse_args(parser) - - args.load(args.config_path, "Model") - args.run_infer = True # only build infer program - - return args - - @serving - def generate(self, texts): - """ - Get the robot responses of the input texts. - - Args: - texts(list or str): If not in the interactive mode, texts should be a list in which every element is the chat context separated with '\t'. - Otherwise, texts shoule be one sentence. The module can get the context automatically. - - Returns: - results(list): the robot responses. - """ - if not texts: - return [] - if self._interactive_mode: - if isinstance(texts, str): - self.context.append(texts.strip()) - texts = [" [SEP] ".join(self.context[-self.max_turn:])] - else: - raise ValueError( - "In the interactive mode, the input data should be a string." - ) - elif not isinstance(texts, list): - raise ValueError( - "If not in the interactive mode, the input data should be a list." - ) - - bot_responses = [] - for i, text in enumerate(texts): - example = self.Example(src=text.replace("\t", " [SEP] "), data_id=i) - record = self.task.reader._convert_example_to_record( - example, is_infer=True) - data = self.task.reader._pad_batch_records([record], is_infer=True) - pred = self.task.infer_step(self.model, data)[0] # batch_size is 1 - bot_response = pred["response"] # ignore data_id and score - bot_responses.append(bot_response) - - if self._interactive_mode: - self.context.append(bot_responses[0].strip()) - return bot_responses - - @contextlib.contextmanager - def interactive_mode(self, max_turn=6): - """ - Enter the interactive mode. - - Args: - max_turn(int): the max dialogue turns. max_turn = 1 means the robot can only remember the last one utterance you have said. - """ - self._interactive_mode = True - self.max_turn = max_turn - self.context = [] - yield - self.context = [] - self._interactive_mode = False - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, optional.") - - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate(texts=input_data) - - return results - - -if __name__ == "__main__": - module = Plato() - for result in module.generate([ - "Hello", - "Hello\thi, nice to meet you, my name is tom\tso your name is tom?" - ]): - print(result) - with module.interactive_mode(max_turn=3): - while True: - human_utterance = input() - robot_utterance = module.generate(human_utterance) - print("Robot: %s" % robot_utterance[0]) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/readers/dialog_reader.py b/hub_module/modules/text/text_generation/plato2_en_large/readers/dialog_reader.py deleted file mode 100644 index 20d02dfd493b1b1721cb80d1f0cf51c6ec9321fa..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/readers/dialog_reader.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Dialogue Reader.""" - -import csv -from collections import namedtuple -from contextlib import contextmanager -import gzip -import os - -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.collective import fleet - -from plato2_en_large.utils import pad_batch_data -from plato2_en_large.utils.args import str2bool -from plato2_en_large.utils.masking import mask -import plato2_en_large.utils.tokenization as tokenization - - -class DialogReader(object): - """The implement of DialogReader.""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Reader") - group.add_argument("--max_src_len", type=int, default=128) - group.add_argument("--max_tgt_len", type=int, default=128) - group.add_argument( - "--truncate_first_turn", type=str2bool, default=False) - group.add_argument( - "--file_format", - type=str, - default="file", - choices=["file", "filelist"]) - group.add_argument( - "--data_format", - type=str, - default="raw", - choices=["raw", "tokenized", "numerical"]) - group.add_argument("--in_tokens", type=str2bool, default=False) - group.add_argument("--batch_size", type=int, default=16) - group.add_argument("--continuous_position", type=str2bool, default=True) - group.add_argument("--random_seed", type=int, default=11) - group.add_argument("--sort_pool_size", type=int, default=2**16) - - group = parser.add_argument_group("Tokenizer") - group.add_argument( - "--tokenizer", type=str, default="SentencePieceTokenizer") - args, _ = parser.parse_known_args() - tokenizer_cls = getattr(tokenization, args.tokenizer) - tokenizer_cls.add_cmdline_args(parser) - return group - - def __init__(self, args): - tokenizer_cls = getattr(tokenization, args.tokenizer) - self.tokenizer = tokenizer_cls(args) - self.vocab = self.tokenizer.vocab - self.pad_id = args.pad_id = self.vocab["[PAD]"] - self.bos_id = args.bos_id = self.vocab["[CLS]"] - self.eos_id = args.eos_id = self.vocab["[SEP]"] - self.unk_id = args.unk_id = self.vocab["[UNK]"] - self.mask_id = args.mask_id = self.vocab["[MASK]"] - self.vocab_size = args.get("vocab_size", 0) - self.max_src_len = args.max_src_len - self.max_tgt_len = args.max_tgt_len - self.truncate_first_turn = args.truncate_first_turn - self.file_format = args.file_format - self.data_format = args.data_format - self.in_tokens = args.in_tokens - self.batch_size = args.batch_size - self.continuous_position = args.continuous_position - self.sort_pool_size = args.sort_pool_size - - # random_seed must be set for data slicing when using multi-gpu - self.global_rng = np.random.RandomState(args.random_seed) - - # training progress - self.current_example = 0 - self.current_epoch = 0 - self.num_examples = 0 - - # model related - - self.fields = ["token_ids", "type_ids", "pos_ids"] - self.num_numerical_fields = len(self.fields) - self.fields += ["tgt_start_idx", "data_id"] - self.sort_key = lambda record: [len(record.token_ids)] - - self.Record = namedtuple( - "Record", self.fields, defaults=(None, ) * len(self.fields)) - - self.features = {} - return - - def get_train_progress(self): - """Gets progress for training phase.""" - return self.current_epoch, self.current_file_index, self.total_file - - def _convert_example_to_record(self, example, is_infer): - # process src - src_token_ids = [] - src_pos_ids = [] - - # tokenize src - s_token_ids_list = [] - for s in example.src.split("[SEP]"): - s = tokenization.convert_to_unicode(s).strip() - - if self.data_format == "tokenized": - s_tokens = s.split(" ") - else: - s_tokens = self.tokenizer.tokenize(s) - - s_token_ids = self.tokenizer.convert_tokens_to_ids(s_tokens) + [ - self.eos_id - ] - s_token_ids_list.append(s_token_ids) - - # trim src - idx = len(s_token_ids_list) - 1 - total_token_num = 1 - while idx >= 0: - total_token_num += len(s_token_ids_list[idx]) - if total_token_num > self.max_src_len: - if self.truncate_first_turn and idx == 0: - truncated_ids = s_token_ids_list[idx][:self.max_src_len - - total_token_num] - if len(truncated_ids) > 1: - s_token_ids_list[idx] = truncated_ids[:-1] + [ - self.eos_id - ] - idx -= 1 - break - idx -= 1 - - for i, s_token_ids in enumerate(s_token_ids_list[idx + 1:], idx + 1): - src_token_ids += s_token_ids - src_pos_ids += list(range(1, len(s_token_ids) + 1)) - - src_token_ids = [self.bos_id] + src_token_ids - src_type_ids = [0] * len(src_token_ids) - src_pos_ids = [0] + src_pos_ids - assert len(src_token_ids) == len(src_type_ids) == len(src_pos_ids), \ - "not len(src_token_ids) == len(src_type_ids) == len(src_pos_ids)" - - token_ids = src_token_ids - type_ids = src_type_ids - pos_ids = src_pos_ids - tgt_start_idx = len(token_ids) - - if not is_infer: - # process tgt - # tokenize tgt - tgt = tokenization.convert_to_unicode(example.tgt).strip() - if self.data_format == "tokenized": - tgt_tokens = tgt.split(" ") - else: - tgt_tokens = self.tokenizer.tokenize(tgt) - - tgt_token_ids = self.tokenizer.convert_tokens_to_ids(tgt_tokens) - tgt_token_ids.append(self.eos_id) - - # trim tgt - if len(tgt_token_ids) > self.max_tgt_len - 1: - tgt_token_ids = tgt_token_ids[:self.max_tgt_len - 1] - - tgt_token_ids = [self.bos_id] + tgt_token_ids - tgt_type_ids = [1] * len(tgt_token_ids) - tgt_pos_ids = list(range(1, len(tgt_token_ids) + 1)) - assert len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids), \ - "not len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids)" - - token_ids += tgt_token_ids - type_ids += tgt_type_ids - pos_ids += tgt_pos_ids - - assert len(token_ids) == len(type_ids) == len(pos_ids), \ - "not len(token_ids) == len(type_ids) == len(pos_ids)" - - if self.continuous_position: - src_pos_ids = list(range(len(src_token_ids))) - if not is_infer: - tgt_pos_ids = list(range(len(tgt_token_ids))) - pos_ids = list(range(len(token_ids))) - - field_values = { - "token_ids": src_token_ids, - "type_ids": src_type_ids, - "pos_ids": src_pos_ids - } - field_values["tgt_start_idx"] = tgt_start_idx - field_values["data_id"] = example.data_id - - record = self.Record(**field_values) - return record - - def _read_tsv(self, fp, phase, is_infer, delimiter="\t", quotechar=None): - """Reads a tab separated value file.""" - csv.field_size_limit(2**20) - reader = csv.reader(fp, delimiter=delimiter, quotechar=quotechar) - headers = next(reader) - headers.append("data_id") - Example = namedtuple("Example", headers) - - for i, line in enumerate(reader): - example = Example(*line, data_id=i) - if is_infer or phase.endswith("test"): - self.features[phase][i] = example - record = self._convert_example_to_record(example, is_infer) - yield record - - def _read_numerical_file(self, fp, delimiter=";"): - for i, line in enumerate(fp): - cols = tokenization.convert_to_unicode(line).strip().split( - delimiter) - cols = list(map(lambda x: list(map(int, x.split(" "))), cols)) - if len(cols) > self.num_numerical_fields: - cols = cols[:self.num_numerical_fields] - tgt_start_idx = cols[0].index(self.bos_id, 1) - record = self.Record(*cols, tgt_start_idx=tgt_start_idx, data_id=i) - yield record - - def _read_file(self, input_file, phase, is_infer): - def __wrapper__(): - with open_file(input_file) as fp: - if self.data_format == "numerical": - records = self._read_numerical_file(fp) - else: - records = self._read_tsv(fp, phase, is_infer) - for record in records: - yield record - - return __wrapper__ - - def _read_files(self, filelist, phase, is_infer, shuffle_files): - input_files = open(filelist).readlines() - - def __wrapper__(): - if shuffle_files: - self.global_rng.shuffle(input_files) - - if phase == "train": - self.total_file = len(input_files) - for file_index, input_file in enumerate(input_files, 1): - if phase == "train": - self.current_file_index = file_index - self.current_file = input_file - file_reader = self._read_file(input_file.strip(), phase, - is_infer) - for record in file_reader(): - yield record - - return __wrapper__ - - def _batch_reader(self, - reader, - phase=None, - is_infer=False, - sort_pool_size=2**16): - """Construct a batch reader.""" - - def update_max_lens(max_lens, record): - """Update max_lens.""" - if max_lens is None: - return self.sort_key(record) - else: - return [ - max(max_len, l) - for max_len, l in zip(max_lens, self.sort_key(record)) - ] - - def get_batch(reader): - """Generate batches from reader.""" - batch, max_lens = [], None - for record in reader(): - if record is None: - yield batch - batch, max_lens = [], None - continue - - self.current_example += 1 - max_lens = update_max_lens(max_lens, record) - if self.in_tokens: - to_append = ( - len(batch) + 1) * sum(max_lens) <= self.batch_size - else: - to_append = len(batch) < self.batch_size - if to_append: - batch.append(record) - else: - yield batch - batch, max_lens = [record], self.sort_key(record) - - if len(batch) > 0: - yield batch - - def get_sorted_batch(pool): - """Generate sorted batches from pool.""" - pool = sorted(pool, key=self.sort_key) - batches = [] - batch, max_lens = [], None - for record in pool: - self.current_example += 1 - max_lens = update_max_lens(max_lens, record) - if self.in_tokens: - to_append = ( - len(batch) + 1) * sum(max_lens) <= self.batch_size - else: - to_append = len(batch) < self.batch_size - if to_append: - batch.append(record) - else: - batches.append(batch) - batch, max_lens = [record], self.sort_key(record) - - if len(batch) > 0: - batches.append(batch) - self.global_rng.shuffle(batches) - - for batch in batches: - yield batch - - def __wrapper__(): - if sort_pool_size > 0: - pool = [] - for record in reader(): - pool.append(record) - if len(pool) == sort_pool_size: - for batch in get_sorted_batch(pool): - yield batch - pool = [] - if len(pool) > 0: - for batch in get_sorted_batch(pool): - yield batch - else: - for batch in get_batch(reader): - yield batch - - return __wrapper__ - - def _distributed_batch_reader(self, - batch_reader, - num_part, - part_id, - is_test=False): - def __wrapper__(): - batches = [] - for batch in batch_reader(): - batches.append(batch) - if len(batches) == num_part: - yield batches[part_id] - batches = [] - if is_test and 0 <= part_id < len(batches): - yield batches[part_id] - return - - return __wrapper__ - - def data_generator(self, - input_file=None, - reader=None, - num_epochs=1, - num_part=1, - part_id=0, - phase=None, - is_infer=False): - """Data generator.""" - - def __wrapper__(): - if is_infer or phase.endswith("test"): - self.features[phase] = {} - - nonlocal reader - if reader is None: - if self.file_format == "filelist": - reader = self._read_files(input_file, phase, is_infer, - not phase.endswith("test")) - else: - if phase == "train": - self.total_file = 1 - self.current_file_index = 1 - self.current_file = input_file - reader = self._read_file(input_file, phase, is_infer) - - batch_reader = self._batch_reader( - reader, - phase, - is_infer, - sort_pool_size=self.sort_pool_size if not is_infer else 0) - if phase == "train": - batch_reader = self._distributed_batch_reader( - batch_reader, num_part, part_id) - elif phase.startswith("distributed"): - batch_reader = self._distributed_batch_reader( - batch_reader, num_part, part_id, is_test=True) - - for epoch_index in range(num_epochs): - if phase == "train": - self.current_example = 0 - self.current_epoch = epoch_index + 1 - for batch in batch_reader(): - yield self._pad_batch_records(batch, is_infer) - - return __wrapper__ - - def _gen_self_attn_mask(self, - batch_token_ids, - batch_tgt_start_idx=None, - is_unidirectional=True, - shift_len=0): - max_len = max(map(len, batch_token_ids)) - input_mask_data = np.zeros((len(batch_token_ids), max_len + shift_len, - max_len + shift_len)) - if is_unidirectional: - for index, mask_data in enumerate(input_mask_data): - start = 0 if batch_tgt_start_idx is None else batch_tgt_start_idx[ - index] - end = len(batch_token_ids[index]) - mask_data[:end + shift_len, :start + shift_len] = 1.0 - # Generate the lower triangular matrix using the slice of matrix - b = np.tril(np.ones([end - start, end - start]), 0) - mask_data[start + shift_len:end + shift_len, start + - shift_len:end + shift_len] = b - else: - for index, token_ids in enumerate(batch_token_ids): - input_mask_data[index, :len(token_ids) + - shift_len, :len(token_ids) + shift_len] = 1.0 - return input_mask_data.astype("float32") - - def _pad_batch_records(self, batch_records, is_infer): - """ - Padding batch records and construct model's inputs. - """ - batch_size = len(batch_records) - batch = {} - batch_token_ids = [record.token_ids for record in batch_records] - batch_type_ids = [record.type_ids for record in batch_records] - batch_pos_ids = [record.pos_ids for record in batch_records] - batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - - batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] - batch["generation_mask"] = self._gen_self_attn_mask( - batch_token_ids, batch_tgt_start_idx=batch_tgt_start_idx) - - if is_infer: - tgt_ids = np.array( - [[[self.bos_id]]] * len(batch_token_ids), dtype="int64") - if self.continuous_position: - tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") - else: - tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") - tgt_pos = tgt_pos.reshape(-1, 1, 1) - batch["init_score"] = np.zeros_like( - tgt_ids, dtype="float32").reshape(-1, 1).tolist() - batch["tgt_ids"] = tgt_ids.tolist() - batch["tgt_pos"] = tgt_pos.tolist() - - batch["tgt_generation_mask"] = batch[ - "generation_mask"][:, 0:1, :].astype("float32") - else: - batch["tgt_label"], batch["tgt_pos"] = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - sent_b_starts=batch_tgt_start_idx, - is_unidirectional=True) - - batch_data_id = [record.data_id for record in batch_records] - batch["data_id"] = np.array(batch_data_id).astype("int64").reshape( - [-1, 1]) - return batch - - -@contextmanager -def open_file(filename): - """Open file.""" - if filename.endswith(".gz"): - fp = gzip.open(filename, "rt") - else: - fp = open(filename) - yield fp - fp.close() diff --git a/hub_module/modules/text/text_generation/plato2_en_large/readers/nsp_reader.py b/hub_module/modules/text/text_generation/plato2_en_large/readers/nsp_reader.py deleted file mode 100644 index f3619299bed1be64c5e33689a7672302ab78763d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/readers/nsp_reader.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""NSP Reader.""" - -from collections import namedtuple - -import numpy as np - -from plato2_en_large.readers.dialog_reader import DialogReader -from plato2_en_large.utils import pad_batch_data -from plato2_en_large.utils.args import str2bool -from plato2_en_large.utils.masking import mask - - -class NSPReader(DialogReader): - """NSP Reader.""" - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = DialogReader.add_cmdline_args(parser) - group.add_argument( - "--attention_style", - type=str, - default="bidirectional", - choices=["bidirectional", "unidirectional"]) - group.add_argument( - "--mix_negative_sample", type=str2bool, default=False) - return group - - def __init__(self, args): - super(NSPReader, self).__init__(args) - self.fields.append("label") - self.Record = namedtuple( - "Record", self.fields, defaults=(None, ) * len(self.fields)) - - self.attention_style = args.attention_style - self.mix_negative_sample = args.mix_negative_sample - return - - def _convert_example_to_record(self, example, is_infer): - record = super(NSPReader, self)._convert_example_to_record( - example, False) - if "label" in example._fields: - record = record._replace(label=int(example.label)) - return record - - def _mix_negative_sample(self, reader, neg_pool_size=2**16): - def gen_from_pool(pool): - num_samples = len(pool) - if num_samples == 1: - # only one sample: it is impossible to generate negative sample - yield pool[0]._replace(label=1) - return - self.global_rng.shuffle(pool) - for i in range(num_samples): - pool[i] = pool[i]._replace(label=1) - j = (i + 1) % num_samples - idx_i = pool[i].tgt_start_idx - idx_j = pool[j].tgt_start_idx - field_values = {} - field_values["token_ids"] = pool[i].token_ids[:idx_i] + pool[ - j].token_ids[idx_j:] - field_values["type_ids"] = pool[i].type_ids[:idx_i] + pool[ - j].type_ids[idx_j:] - field_values["pos_ids"] = list( - range(len(field_values["token_ids"]))) - neg_record = self.Record( - **field_values, tgt_start_idx=idx_i, data_id=-1, label=0) - pool.append(neg_record) - assert len(neg_record.token_ids) <= self.max_seq_len - self.global_rng.shuffle(pool) - for record in pool: - yield record - - def __wrapper__(): - pool = [] - for record in reader(): - pool.append(record) - if len(pool) == neg_pool_size: - for record in gen_from_pool(pool): - yield record - pool = [] - if len(pool) > 0: - for record in gen_from_pool(pool): - yield record - - return __wrapper__ - - def _batch_reader(self, - reader, - phase=None, - is_infer=False, - sort_pool_size=2**16): - if self.mix_negative_sample: - reader = self._mix_negative_sample(reader) - return super(NSPReader, self)._batch_reader( - reader, - phase=phase, - is_infer=is_infer, - sort_pool_size=sort_pool_size) - - def _pad_batch_records(self, batch_records, is_infer): - """ - Padding batch records and construct model's inputs. - """ - batch = {} - batch_token_ids = [record.token_ids for record in batch_records] - batch_type_ids = [record.type_ids for record in batch_records] - batch_pos_ids = [record.pos_ids for record in batch_records] - batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] - batch_label = [record.label for record in batch_records] - - if self.attention_style == "unidirectional": - batch["token_ids"] = pad_batch_data( - batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data( - batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - tgt_label, tgt_pos, label_pos = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - bos_id=self.bos_id, - sent_b_starts=batch_tgt_start_idx, - labels=batch_label, - is_unidirectional=True) - attention_mask = self._gen_self_attn_mask(batch_token_ids, - batch_tgt_start_idx) - else: - batch_mask_token_ids, tgt_label, tgt_pos, label_pos = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - bos_id=self.bos_id, - eos_id=self.eos_id, - mask_id=self.mask_id, - sent_b_starts=batch_tgt_start_idx, - labels=batch_label, - is_unidirectional=False) - if not is_infer: - batch_token_ids = batch_mask_token_ids - batch["token_ids"] = pad_batch_data( - batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data( - batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - attention_mask = self._gen_self_attn_mask( - batch_token_ids, is_unidirectional=False) - - batch["attention_mask"] = attention_mask - batch["label_pos"] = label_pos - - if not is_infer: - batch_label = np.array(batch_label).astype("int64").reshape([-1, 1]) - batch["label"] = batch_label - batch["tgt_label"] = tgt_label - batch["tgt_pos"] = tgt_pos - - batch_data_id = [record.data_id for record in batch_records] - batch["data_id"] = np.array(batch_data_id).astype("int64").reshape( - [-1, 1]) - return batch diff --git a/hub_module/modules/text/text_generation/plato2_en_large/readers/plato_reader.py b/hub_module/modules/text/text_generation/plato2_en_large/readers/plato_reader.py deleted file mode 100644 index a408ef731926314c3a5f260b3bc31b8363ad9ca8..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/readers/plato_reader.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Plato Reader.""" - -import numpy as np - -from plato2_en_large.readers.dialog_reader import DialogReader -from plato2_en_large.utils import pad_batch_data -from plato2_en_large.utils.masking import mask - - -class PlatoReader(DialogReader): - """The implement of PlatoReader""" - - def __init__(self, args): - super(PlatoReader, self).__init__(args) - self.latent_type_size = args.latent_type_size - self.use_bow = args.use_bow - - def _pad_batch_records(self, batch_records, is_infer): - """ - Padding batch records and construct model's inputs. - """ - batch = {} - batch_token_ids = [record.token_ids for record in batch_records] - batch_type_ids = [record.type_ids for record in batch_records] - batch_pos_ids = [record.pos_ids for record in batch_records] - - batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] - - batch_size = len(batch_token_ids) - - # padding - batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) - batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) - batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) - - batch["generation_mask"] = self._gen_self_attn_mask( - batch_token_ids, - batch_tgt_start_idx=batch_tgt_start_idx, - is_unidirectional=True, - shift_len=1) - if not is_infer: - batch["recognition_mask"] = self._gen_self_attn_mask( - batch_token_ids, is_unidirectional=False, shift_len=1) - - if is_infer: - tgt_ids = np.array([[[self.bos_id]]] * batch_size, dtype="int64") - if self.continuous_position: - tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") - else: - tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") - tgt_pos = tgt_pos.reshape(-1, 1, 1) - batch["init_score"] = np.zeros_like( - tgt_ids, dtype="float32").reshape(-1, 1).tolist() - batch["tgt_ids"] = tgt_ids.tolist() - batch["tgt_pos"] = tgt_pos.tolist() - batch["parent_idx"] = np.array(range(batch_size), dtype="int32") - - batch["tgt_generation_mask"] = batch[ - "generation_mask"][:, 0:1, :].astype("float32") - else: - mask_return_list = mask( - batch_tokens=batch_token_ids, - vocab_size=self.vocab_size, - sent_b_starts=batch_tgt_start_idx, - is_unidirectional=True, - use_latent=True, - use_bow=self.use_bow) - batch["tgt_label"] = mask_return_list[0] - batch["tgt_pos"] = mask_return_list[1] - if self.use_bow: - batch["bow_label"] = mask_return_list[2] - batch["bow_pos"] = mask_return_list[3] - - batch_data_id = [record.data_id for record in batch_records] - batch["data_id"] = np.array(batch_data_id).astype("int64").reshape( - [-1, 1]) - return batch diff --git a/hub_module/modules/text/text_generation/plato2_en_large/tasks/dialog_generation.py b/hub_module/modules/text/text_generation/plato2_en_large/tasks/dialog_generation.py deleted file mode 100644 index d36f293ac720fdf24554bbe8240fa1e4d3b9d08c..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/tasks/dialog_generation.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Dialogue generation task.""" - -from collections import defaultdict -import math - -from plato2_en_large.readers.dialog_reader import DialogReader -from plato2_en_large.readers.plato_reader import PlatoReader -from plato2_en_large.tasks import register_task -from plato2_en_large.tasks.task_base import Task -from plato2_en_large.utils.args import str2bool -from plato2_en_large.utils.inference import create_predictor - - -def post_process_context(token_ids, reader, merge=True): - """Post-process the context sequence.""" - context = [] - utt = [] - for tok_id in token_ids[1:]: - if tok_id == reader.eos_id: - utt = reader.tokenizer.convert_ids_to_tokens(utt) - if merge: - utt = reader.tokenizer.merge_subword(utt) - context.append(utt) - utt = [] - else: - utt.append(tok_id) - return context - - -def post_process_response(token_ids, reader, merge=True): - """ - Post-process the decoded sequence. Truncate from the first - and remove the and tokens currently. - """ - eos_pos = len(token_ids) - for i, tok_id in enumerate(token_ids): - if tok_id == reader.eos_id: - eos_pos = i - break - token_ids = token_ids[1:eos_pos] - response = reader.tokenizer.convert_ids_to_tokens(token_ids) - if merge: - response = reader.tokenizer.merge_subword(response) - return token_ids, response - - -def get_cross_turn_repetition(context, pred_tokens, eos_idx, is_cn=False): - """Get cross-turn repetition.""" - if len(pred_tokens) == 0: - return 1.0 - if is_cn: - context = ["".join(utt) for utt in context] - pred_tokens = "".join(pred_tokens) - - pred_tri_grams = set() - for i in range(len(pred_tokens) - 2): - tri_gram = tuple(pred_tokens[i:i + 3]) - pred_tri_grams.add(tri_gram) - for utt in context: - for i in range(len(utt) - 2): - tri_gram = tuple(utt[i:i + 3]) - if tri_gram in pred_tri_grams: - return 1.0 - return 0.0 - - -def get_in_turn_repetition(pred, is_cn=False): - """Get in-turn repetition.""" - if len(pred) == 0: - return 1.0 - if isinstance(pred[0], str): - pred = [tok.lower() for tok in pred] - if is_cn: - pred = "".join(pred) - tri_grams = set() - for i in range(len(pred) - 2): - tri_gram = tuple(pred[i:i + 3]) - if tri_gram in tri_grams: - return 1.0 - tri_grams.add(tri_gram) - return 0.0 - - -def get_nsp_score_batch(nsp_predictor, predictions): - """ - Get NSP scores of a batch. - """ - import argparse - from collections import namedtuple - - from plato2_en_large.readers.nsp_reader import NSPReader - from plato2_en_large.utils.args import parse_args - from plato2_en_large.tasks.next_sentence_prediction import NextSentencePrediction - - parser = argparse.ArgumentParser() - NextSentencePrediction.add_cmdline_args(parser) - parser.add_argument("--num_samples", type=int, default=None) - parser.add_argument("--config_path", type=str, required=True) - parser.add_argument("--mem_efficient", type=str2bool, default=False) - - args = parse_args(parser, allow_unknown=True) - args.load(args.config_path) - if not args.mem_efficient: - if args.num_samples: - args.batch_size *= args.num_samples - if args.latent_type_size: - args.batch_size *= args.latent_type_size - args.tokenized_input = True - reader = NSPReader(args) - - def __reader__(): - headers = ["src", "tgt", "data_id"] - - Example = namedtuple("Example", headers) - - for i, info in enumerate(predictions): - context = post_process_context( - info["context_token_ids"], reader, merge=False) - context_tokenized_input = " [SEP] ".join( - " ".join(utt) for utt in context) - _, response = post_process_response( - info["response_token_ids"], reader, merge=False) - response_tokenized_input = " ".join(response) - example = Example( - src=context_tokenized_input, - tgt=response_tokenized_input, - data_id=i) - record = reader._convert_example_to_record(example, is_infer=True) - yield record - return - - generator = reader.data_generator( - reader=__reader__, - is_infer=True, - phase="test", - ) - - steps = 0 - for data in generator(): - outputs = nsp_predictor(data) - for probs, data_id in zip(outputs[0], outputs[-1]): - data_id = data_id[0] - info = predictions[data_id] - info["nsp_score"] = float(probs[1]) - - return - - -@register_task("DialogGeneration") -class DialogGeneration(Task): - """ - Define dialogue response generation. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = parser.add_argument_group("Task") - group.add_argument("--do_generation", type=str2bool, default=False) - group.add_argument("--is_cn", type=str2bool, default=False) - - group.add_argument("--nsp_inference_model_path", type=str, default=None) - group.add_argument( - "--nsp_attention_style", type=str, default="bidirectional") - - group.add_argument("--ranking_score", type=str, default="decode_score") - - args, _ = parser.parse_known_args() - if args.model == "Plato": - PlatoReader.add_cmdline_args(parser) - else: - DialogReader.add_cmdline_args(parser) - return group - - def __init__(self, args): - super(DialogGeneration, self).__init__(args) - self.do_generation = args.do_generation - self.is_cn = args.is_cn - if args.model == "Plato": - self.reader = PlatoReader(args) - else: - self.reader = DialogReader(args) - - if args.nsp_inference_model_path: - self.nsp_predictor = create_predictor(args.nsp_inference_model_path, - args.is_distributed) - self.nsp_attention_style = args.nsp_attention_style - else: - self.nsp_predictor = None - - self.ranking_score = args.ranking_score - self.max_dec_len = args.max_dec_len - return - - def _post_process_generation_output(self, predictions): - """ - Post process generation output. - - Calculate repetion, reranking. - """ - for info in predictions: - tokens = post_process_context(info["context_token_ids"], - self.reader) - pred_token_ids, pred_tokens = post_process_response( - info["response_token_ids"], self.reader) - info["context"] = " [SEP] ".join(" ".join(u) for u in tokens) - info["response"] = " ".join(pred_tokens) - info["num_token"] = len(pred_token_ids) - info["cross_turn_repetition"] = get_cross_turn_repetition( - tokens, pred_tokens, self.reader.eos_id, self.is_cn) - info["in_turn_repetition"] = max( - get_in_turn_repetition(pred_tokens, self.is_cn), - get_in_turn_repetition(pred_token_ids)) - if self.nsp_predictor is not None: - get_nsp_score_batch(self.nsp_predictor, predictions) - - group = defaultdict(list) - for info in predictions: - group[info["data_id"]].append(info) - - predictions = [] - for data_id in group: - infos = group[data_id] - for info in infos: - info["score"] = info[self.ranking_score] - if self.max_dec_len is not None and info[ - "num_token"] >= self.max_dec_len: # not ending - info["score"] -= 1e3 - elif info["cross_turn_repetition"] > 0: - info["score"] -= 1e3 - elif info["in_turn_repetition"] > 0: - info["score"] -= 1e3 - infos = sorted(infos, key=lambda info: -info["score"]) - pred = infos[0] - keep_attr = ["data_id", "score", "response"] - pred = {k: pred[k] for k in keep_attr} - predictions.append(pred) - return predictions - - def _post_process_scoring_output(self, predictions): - raise NotImplementedError - - def _post_process_infer_output(self, predictions): - if self.do_generation: - return self._post_process_generation_output(predictions) - else: - return self._post_process_scoring_output(predictions) - - def merge_mertrics_and_statistics(self, outputs, part_outputs): - """ - Merge two evaulation output. - """ - if outputs is None: - return part_outputs - - if part_outputs is None: - return outputs - - batch_size = outputs.pop("batch_size") - tokens_num = outputs.pop("tokens_num") - part_batch_size = part_outputs.pop("batch_size") - part_tokens_num = part_outputs.pop("tokens_num") - - new_outputs = { - "batch_size": batch_size + part_batch_size, - "tokens_num": tokens_num + part_tokens_num - } - for k in outputs: - if k.startswith("token_"): - new_outputs[k] = (outputs[k] * tokens_num + part_outputs[k] * - part_tokens_num) / new_outputs["tokens_num"] - else: - new_outputs[k] = (outputs[k] * batch_size + part_outputs[k] * - part_batch_size) / new_outputs["batch_size"] - return new_outputs - - def get_metrics(self, outputs): - """ - Get metrics. - """ - if outputs is None: - raise ValueError("metrics is None") - outputs = dict(outputs) - outputs.pop("batch_size", None) - outputs.pop("tokens_num", None) - metrics = {} - for k in outputs: - if k.startswith("token_"): - metrics[k[6:]] = outputs[k] - else: - metrics[k] = outputs[k] - if k == "token_lm_loss": - metrics["ppl"] = math.exp(outputs[k]) - return metrics diff --git a/hub_module/modules/text/text_generation/plato2_en_large/tasks/next_sentence_prediction.py b/hub_module/modules/text/text_generation/plato2_en_large/tasks/next_sentence_prediction.py deleted file mode 100644 index 9cecf3cf772671a8858d386c6fc70f5ad05f2f21..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/tasks/next_sentence_prediction.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Next sentence prediction task.""" - -from plato2_en_large.readers.nsp_reader import NSPReader -from plato2_en_large.tasks import register_task -from plato2_en_large.tasks.task_base import Task -from plato2_en_large.utils.args import str2bool - - -@register_task("NextSentencePrediction") -class NextSentencePrediction(Task): - """ - Define dialogue response generation. - """ - - @classmethod - def add_cmdline_args(cls, parser): - """Add cmdline argurments.""" - group = NSPReader.add_cmdline_args(parser) - return group - - def __init__(self, args): - super(NextSentencePrediction, self).__init__(args) - self.reader = NSPReader(args) - return - - def _post_process_infer_output(self, predictions): - predictions = [{ - "data_id": data_id.tolist()[0], - "score": score.tolist()[1] - } for data_id, score in zip(predictions["data_id"], - predictions["scores"])] - return predictions diff --git a/hub_module/modules/text/text_generation/plato2_en_large/tasks/task_base.py b/hub_module/modules/text/text_generation/plato2_en_large/tasks/task_base.py deleted file mode 100644 index e89338636fe25362c101246ef1c35b4dc9c153c4..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/tasks/task_base.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Task base.""" - -from abc import abstractmethod, ABC - -from plato2_en_large.models.model_base import Model - - -class Task(ABC): - """ - Basic task. - """ - - def __init__(self, args): - return - - def train_step(self, model: Model, inputs): - """Run one training step.""" - outputs = model.train_step(inputs) - outputs = {k: v.tolist()[0] for k, v in outputs.items()} - return outputs - - def eval_step(self, model: Model, inputs): - """Run one evaluation step""" - outputs = model.eval_step(inputs) - outputs = {k: v.tolist()[0] for k, v in outputs.items()} - return outputs - - def infer_step(self, model: Model, inputs): - """Run one inference step.""" - predictions = model.infer_step(inputs) - outputs = self._post_process_infer_output(predictions) - return outputs - - def _post_process_infer_output(self, predictions): - """ - Post-process inference output. - """ - return predictions - - def merge_mertrics_and_statistics(self, outputs, part_outputs): - """ - Merge metrics and statistics. - """ - if outputs is None: - return part_outputs - - if part_outputs is None: - return outputs - - batch_size = outputs.pop("batch_size") - part_batch_size = part_outputs.pop("batch_size") - - new_outputs = { - "batch_size": batch_size + part_batch_size, - } - for k in outputs: - new_outputs[k] = (outputs[k] * batch_size + part_outputs[k] * - part_batch_size) / new_outputs["batch_size"] - return new_outputs - - def get_metrics(self, outputs): - """ - Get metrics. - """ - if outputs is None: - raise ValueError("metrics is None") - outputs = dict(outputs) - # pop statistics - outputs.pop("batch_size", None) - return outputs - - def get_data_loader(self, model, *args, is_infer=False, **kwargs): - generator = self.reader.data_generator( - *args, is_infer=is_infer, **kwargs) - return model.get_data_loader(generator, is_infer) diff --git a/hub_module/modules/text/text_generation/plato2_en_large/utils/__init__.py b/hub_module/modules/text/text_generation/plato2_en_large/utils/__init__.py deleted file mode 100644 index 12da7e947a821727e8ff3ee6c683c84f64583281..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/utils/__init__.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utils.""" - -from itertools import chain -import os -import time -import sys - -import numpy as np -import paddle.fluid as fluid - - -def to_lodtensor(data, place): - """Convert data to LoDTensor.""" - if place is None: - return data - lengths = [] - while isinstance(data[0], list): - lengths.append(list(map(len, data))) - data = [x for xs in data for x in xs] - if isinstance(data[0], float): - data = np.array(data, dtype="float32") - else: - data = np.array(data, dtype="int64") - data_tensor = fluid.LoDTensor() - data_tensor.set(data, place) - data_tensor.set_recursive_sequence_lengths(lengths) - return data_tensor - - -def pad_batch_data(insts, pad_id=0): - """Pad the instances to the max sequence length in batch. """ - max_len = max(map(len, insts)) - inst_data = np.array( - [list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts]) - return inst_data.astype("int64").reshape([-1, max_len, 1]) - - -def convert_lodtensor_to_list(tensor): - data = np.array(tensor) - recursive_sequence_lengths = tensor.recursive_sequence_lengths() - recursive_sequence_lengths.reverse() - for i, lengths in enumerate(recursive_sequence_lengths): - shift = 0 - new_data = [] - for j, l in enumerate(lengths): - new_data.append(data[shift:shift + l]) - shift += l - data = new_data - return data - - -def concatenate_lodtensors(tensors, place): - """Concatenate LoD tensors.""" - data = [] - recursive_sequence_lengths = [] - for tensor in tensors: - data.append(np.array(tensor)) - recursive_sequence_lengths.append(tensor.recursive_sequence_lengths()) - data = np.concatenate(data, axis=0) - recursive_sequence_lengths = [ - sum(lens, []) for lens in zip(*recursive_sequence_lengths) - ] - data_tensor = fluid.LoDTensor() - data_tensor.set(data, place) - data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) - assert data_tensor.has_valid_recursive_sequence_lengths() - return data_tensor - - -def repeat_array_or_tensor(array_or_tensor, place, times): - """Repeate numpy array or LoD tensor.""" - if isinstance(array_or_tensor, fluid.LoDTensor): - data = [np.array(array_or_tensor)] * times - recursive_sequence_lengths = [ - array_or_tensor.recursive_sequence_lengths() - ] * times - data = np.concatenate(data, axis=0) - recursive_sequence_lengths = [ - sum(lens, []) for lens in zip(*recursive_sequence_lengths) - ] - data_tensor = fluid.LoDTensor() - data_tensor.set(data, place) - data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) - assert data_tensor.has_valid_recursive_sequence_lengths() - return data_tensor - elif isinstance(array_or_tensor, list): - return list(chain(*([array_or_tensor] * times))) - else: - return np.concatenate([array_or_tensor] * times, axis=0) - - -def slice_array_or_tensor(array_or_tensor, place, begin, end): - """Repeate numpy array or LoD tensor.""" - if isinstance(array_or_tensor, fluid.LoDTensor): - data = convert_lodtensor_to_list(array_or_tensor) - data = data[begin:end] - return to_lodtensor(data, place) - else: - return array_or_tensor[begin:end] - - -def init_checkpoint(exe, init_checkpoint_path, main_program): - """Initialize from checkpoint.""" - assert os.path.exists( - init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path - - def existed_persitables(var): - """Whether var is a persistables.""" - if not fluid.io.is_persistable(var): - return False - return os.path.exists(os.path.join(init_checkpoint_path, var.name)) - - fluid.io.load_vars( - exe, - init_checkpoint_path, - main_program=main_program, - predicate=existed_persitables) - print(f"Load model from {init_checkpoint_path}") - - -def init_pretraining_params(exe, pretraining_params_path, main_program): - """Only initialize parameters.""" - assert os.path.exists(pretraining_params_path - ), "[%s] cann't be found." % pretraining_params_path - - def existed_params(var): - """Whether var is a parameter.""" - if not isinstance(var, fluid.framework.Parameter): - return False - return os.path.exists(os.path.join(pretraining_params_path, var.name)) - - fluid.io.load_vars( - exe, - pretraining_params_path, - main_program=main_program, - predicate=existed_params) - print(f"Load pretraining parameters from {pretraining_params_path}.") - - return - - -class Timer(object): - def __init__(self): - self._pass_time = 0 - self._start_time = None - return - - def start(self): - self._start_time = time.time() - - def pause(self): - self._pass_time += time.time() - self._start_time - self._start_time = None - - def reset(self): - self._pass_time = 0 - - @property - def pass_time(self): - if self._start_time is None: - return self._pass_time - else: - return self._pass_time + time.time() - self._start_time - - -ERROR_MESSAGE = "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ - Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" - - -def check_cuda(use_cuda, err=ERROR_MESSAGE): - """Check CUDA.""" - try: - if use_cuda and not fluid.is_compiled_with_cuda(): - print(err) - sys.exit(1) - except Exception as e: - pass diff --git a/hub_module/modules/text/text_generation/plato2_en_large/utils/inference.py b/hub_module/modules/text/text_generation/plato2_en_large/utils/inference.py deleted file mode 100644 index f9b01d3e30163519c815311dd54034e74d8e4947..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/utils/inference.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Inference utils.""" - -import os - -import paddle.fluid as fluid - - -def create_predictor(inference_model_path, is_distributed=False): - """Create predictor.""" - if is_distributed: - dev_count = fluid.core.get_cuda_device_count() - gpu_id = int(os.getenv("FLAGS_selected_gpus")) - else: - dev_count = 1 - gpu_id = 0 - - place = fluid.CUDAPlace(gpu_id) - exe = fluid.Executor(place) - - scope = fluid.Scope() - with fluid.scope_guard(scope): - inference_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model( - inference_model_path, exe) - - def __predict__(inputs): - with fluid.scope_guard(scope): - outputs = exe.run( - inference_prog, - feed=inputs, - fetch_list=fetch_targets, - return_numpy=True) - return outputs - - return __predict__ diff --git a/hub_module/modules/text/text_generation/plato2_en_large/utils/masking.py b/hub_module/modules/text/text_generation/plato2_en_large/utils/masking.py deleted file mode 100644 index 62c8e7f096b19f2adfc5f4afb5c40e78d02b9792..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_generation/plato2_en_large/utils/masking.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Reader utils.""" - -import numpy as np - -import plato2_en_large.utils - - -def mask(batch_tokens, - vocab_size, - bos_id=1, - eos_id=2, - mask_id=3, - sent_b_starts=None, - labels=None, - is_unidirectional=False, - use_latent=False, - use_bow=False): - """ - Add mask for batch_tokens, return out, mask_label, mask_pos; - Note: mask_pos responding the batch_tokens after padded; - """ - batch_tokens = np.copy(batch_tokens) - max_len = max(map(len, batch_tokens)) - mask_label = [] - mask_pos = [] - if labels is not None: - label_pos = [] - - if is_unidirectional: - # unidirectional language model - if use_latent: - max_len += 1 - shift_len = 1 - else: - shift_len = 0 - for sent_index, sent in enumerate(batch_tokens): - sent_b_index = sent_b_starts[ - sent_index] if sent_b_starts is not None else 0 - need_cal = True - if labels is not None: - label_pos.append(sent_index * max_len + len(sent) - 1 + - shift_len) - if labels[sent_index] == 0: - need_cal = False - mask_label.extend(sent[sent_b_index + 1:]) - mask_pos.extend([ - sent_index * max_len + i + shift_len - for i in range(sent_b_index, - len(sent) - 1) - ]) - mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) - mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) - return_list = [mask_label, mask_pos] - - # latent related (bow label and pos) - if use_latent and use_bow: - bow_label = [] - bow_pos = [] - for sent_index, sent in enumerate(batch_tokens): - sent_b_index = sent_b_starts[ - sent_index] if sent_b_starts is not None else 0 - - def __filter__(tok_id): - # TODO: exclude [EOS] from bow loss - return True - - bow_pos.extend([ - sent_index for i in range(sent_b_index + 1, len(sent)) - if __filter__(sent[i]) - ]) - bow_label.extend([ - sent[i] for i in range(sent_b_index + 1, len(sent)) - if __filter__(sent[i]) - ]) - bow_label = np.array(bow_label).astype("int64").reshape([-1, 1]) - bow_pos = np.array(bow_pos).astype("int64").reshape([-1, 1]) - return_list += [bow_label, bow_pos] - else: - # bidirectional mask language model - total_token_num = sum(map(len, batch_tokens)) - prob_mask = np.random.rand(total_token_num) - # TODO: fix replace_ids, include [UNK] - replace_ids = np.random.randint( - 3, high=vocab_size, size=total_token_num) - prob_index = 0 - for sent_index, sent in enumerate(batch_tokens): - # add pair label position - if labels is not None: - label_pos.append(sent_index * max_len) - - # add mask label and position - for token_index, token in enumerate(sent): - if token == eos_id or token == bos_id: - continue - prob = prob_mask[prob_index + token_index] - if prob > 0.15: - continue - elif 0.03 < prob <= 0.15: - # mask - mask_label.append(sent[token_index]) - sent[token_index] = mask_id - mask_pos.append(sent_index * max_len + token_index) - elif 0.015 < prob <= 0.03: - # random replace - mask_label.append(sent[token_index]) - sent[token_index] = replace_ids[prob_index + token_index] - mask_pos.append(sent_index * max_len + token_index) - else: - # keep the original token - mask_label.append(sent[token_index]) - mask_pos.append(sent_index * max_len + token_index) - - prob_index += len(sent) - - mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) - mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) - return_list = [batch_tokens, mask_label, mask_pos] - - if labels is not None: - label_pos = np.array(label_pos).astype("int64").reshape([-1, 1]) - assert len(labels) == len(label_pos) - return_list.append(label_pos) - return return_list diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/module.py b/hub_module/modules/text/text_review/porn_detection_cnn/module.py deleted file mode 100644 index 5c21e8ea0c376b65d2288153986661d7294ec608..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_review/porn_detection_cnn/module.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import get_variable_info -from paddlehub.module.module import moduleinfo, serving -from paddlehub.reader import tokenization - -from porn_detection_cnn.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="porn_detection_cnn", - version="1.1.0", - summary="Baidu's open-source Porn Detection Model.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class PornDetectionCNN(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "infer_model") - self.tokenizer_vocab_path = os.path.join(self.directory, "assets", - "vocab.txt") - self.vocab_path = os.path.join(self.directory, "assets", - "word_dict.txt") - self.vocab = load_vocab(self.vocab_path) - self.sequence_max_len = 256 - self.tokenizer = tokenization.FullTokenizer(self.tokenizer_vocab_path) - - self.param_file = os.path.join(self.directory, "assets", "params.txt") - - self.predict = self.detection - - self._set_config() - - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained porn_detection_cnn - Args: - trainable(bool): whether fine-tune the pretrained parameters of porn_detection_cnn or not - Returns: - inputs(dict): the input variables of porn_detection_cnn (words) - outputs(dict): the output variables of porn_detection_cnn (the sentiment prediction results) - main_program(Program): the main_program of porn_detection_cnn with pretrained prameters - """ - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_target_names, fetch_targets = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, executor=exe) - - with open(self.param_file, 'r') as file: - params_list = file.readlines() - for param in params_list: - param = param.strip() - var = program.global_block().var(param) - var_info = get_variable_info(var) - - program.global_block().create_parameter( - shape=var_info['shape'], - dtype=var_info['dtype'], - name=var_info['name']) - - for param in program.global_block().iter_parameters(): - param.trainable = trainable - - for name, var in program.global_block().vars.items(): - if name == feed_target_names[0]: - inputs = {"words": var} - # output of sencond layer from the end prediction layer (fc-softmax) - if name == "@HUB_porn_detection_cnn@layer_norm_1.tmp_2": - outputs = { - "class_probs": fetch_targets[0], - "sentence_feature": var - } - return inputs, outputs, program - - @serving - def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): - """ - Get the porn prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the porn prediction results - """ - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - use_gpu = False - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(batch_data, self.tokenizer, - self.vocab, self.sequence_max_len) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"porn": 1, "not_porn": 0} - return self.labels - - -if __name__ == "__main__": - porn_detection_cnn = PornDetectionCNN() - test_text = ["黄片下载", "打击黄牛党"] - - results = porn_detection_cnn.detection(texts=test_text, batch_size=9) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - input_dict = {"text": test_text} - results = porn_detection_cnn.detection(data=input_dict) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/text_review/porn_detection_gru/module.py b/hub_module/modules/text/text_review/porn_detection_gru/module.py deleted file mode 100644 index d4135e83d0a0314dccd8bf7e453804b88f8cc1d7..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_review/porn_detection_gru/module.py +++ /dev/null @@ -1,173 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import get_variable_info -from paddlehub.module.module import moduleinfo, serving -from paddlehub.reader import tokenization - -from porn_detection_gru.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="porn_detection_gru", - version="1.1.0", - summary="Baidu's open-source Porn Detection Model.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class PornDetectionGRU(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "infer_model") - self.tokenizer_vocab_path = os.path.join(self.directory, "assets", - "vocab.txt") - self.vocab_path = os.path.join(self.directory, "assets", - "word_dict.txt") - self.vocab = load_vocab(self.vocab_path) - self.sequence_max_len = 256 - self.tokenizer = tokenization.FullTokenizer(self.tokenizer_vocab_path) - - self.param_file = os.path.join(self.directory, "assets", "params.txt") - - self.predict = self.detection - - self._set_config() - - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained porn_detection_gru - Args: - trainable(bool): whether fine-tune the pretrained parameters of porn_detection_gru or not - Returns: - inputs(dict): the input variables of porn_detection_gru (words) - outputs(dict): the output variables of porn_detection_gru (the sentiment prediction results) - main_program(Program): the main_program of lac with pretrained prameters - """ - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_target_names, fetch_targets = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, executor=exe) - - with open(self.param_file, 'r') as file: - params_list = file.readlines() - for param in params_list: - param = param.strip() - var = program.global_block().var(param) - var_info = get_variable_info(var) - program.global_block().create_parameter( - shape=var_info['shape'], - dtype=var_info['dtype'], - name=var_info['name']) - - for param in program.global_block().iter_parameters(): - param.trainable = trainable - - for name, var in program.global_block().vars.items(): - if name == feed_target_names[0]: - inputs = {"words": var} - # output of sencond layer from the end prediction layer (fc-softmax) - if name == "@HUB_porn_detection_gru@layer_norm_0.tmp_2": - outputs = { - "class_probs": fetch_targets[0], - "sentence_feature": var - } - return inputs, outputs, program - - @serving - def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): - """ - Get the porn prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the porn prediction results - """ - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - use_gpu = False - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(batch_data, self.tokenizer, - self.vocab, self.sequence_max_len) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"porn": 1, "not_porn": 0} - return self.labels - - -if __name__ == "__main__": - porn_detection_gru = PornDetectionGRU() - porn_detection_gru.context() - # porn_detection_gru = hub.Module(name='porn_detection_gru') - test_text = ["黄片下载", "打击黄牛党"] - - results = porn_detection_gru.detection(texts=test_text) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - input_dict = {"text": test_text} - results = porn_detection_gru.detection(data=input_dict) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/module.py b/hub_module/modules/text/text_review/porn_detection_lstm/module.py deleted file mode 100644 index 960babead42942f736dbb5edc32d99e2d3a0ec7d..0000000000000000000000000000000000000000 --- a/hub_module/modules/text/text_review/porn_detection_lstm/module.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import math -import os -import six - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import get_variable_info -from paddlehub.module.module import moduleinfo, serving -from paddlehub.reader import tokenization - -from porn_detection_lstm.processor import load_vocab, preprocess, postprocess - - -@moduleinfo( - name="porn_detection_lstm", - version="1.1.0", - summary="Baidu's open-source Porn Detection Model.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") -class PornDetectionLSTM(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "infer_model") - self.tokenizer_vocab_path = os.path.join(self.directory, "assets", - "vocab.txt") - self.vocab_path = os.path.join(self.directory, "assets", - "word_dict.txt") - self.vocab = load_vocab(self.vocab_path) - self.sequence_max_len = 256 - self.tokenizer = tokenization.FullTokenizer(self.tokenizer_vocab_path) - - self.param_file = os.path.join(self.directory, "assets", "params.txt") - - self.predict = self.detection - - self._set_config() - - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained porn_detection_lstm - Args: - trainable(bool): whether fine-tune the pretrained parameters of porn_detection_lstm or not - Returns: - inputs(dict): the input variables of porn_detection_lstm (words) - outputs(dict): the output variables of porn_detection_lstm (the sentiment prediction results) - main_program(Program): the main_program of lac with pretrained prameters - """ - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_target_names, fetch_targets = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, executor=exe) - - with open(self.param_file, 'r') as file: - params_list = file.readlines() - for param in params_list: - param = param.strip() - var = program.global_block().var(param) - var_info = get_variable_info(var) - program.global_block().create_parameter( - shape=var_info['shape'], - dtype=var_info['dtype'], - name=var_info['name']) - - for param in program.global_block().iter_parameters(): - param.trainable = trainable - - for name, var in program.global_block().vars.items(): - if name == feed_target_names[0]: - inputs = {"words": var} - # output of sencond layer from the end prediction layer (fc-softmax) - if name == "@HUB_porn_detection_lstm@layer_norm_0.tmp_2": - outputs = { - "class_probs": fetch_targets[0], - "sentence_feature": var - } - return inputs, outputs, program - - @serving - def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): - """ - Get the porn prediction results results with the texts as input - - Args: - texts(list): the input texts to be predicted, if texts not data - data(dict): key must be 'text', value is the texts to be predicted, if data not texts - use_gpu(bool): whether use gpu to predict or not - batch_size(int): the program deals once with one batch - - Returns: - results(list): the porn prediction results - """ - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - use_gpu = False - - if texts != [] and isinstance(texts, list) and data == {}: - predicted_data = texts - elif texts == [] and isinstance(data, dict) and isinstance( - data.get('text', None), list) and data['text']: - predicted_data = data["text"] - else: - raise ValueError( - "The input data is inconsistent with expectations.") - - predicted_data = self.to_unicode(predicted_data) - start_idx = 0 - iteration = int(math.ceil(len(predicted_data) / batch_size)) - results = [] - for i in range(iteration): - if i < (iteration - 1): - batch_data = predicted_data[start_idx:(start_idx + batch_size)] - else: - batch_data = predicted_data[start_idx:] - - start_idx = start_idx + batch_size - processed_results = preprocess(batch_data, self.tokenizer, - self.vocab, self.sequence_max_len) - tensor_words = self.texts2tensor(processed_results) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = postprocess(batch_out[0], processed_results) - results += batch_result - return results - - def get_labels(self): - """ - Get the labels which was used when pretraining - Returns: - self.labels(dict) - """ - self.labels = {"porn": 1, "not_porn": 0} - return self.labels - - -if __name__ == "__main__": - porn_detection_lstm = PornDetectionLSTM() - porn_detection_lstm.context() - test_text = ["黄片下载", "打击黄牛党"] - - results = porn_detection_lstm.detection(texts=test_text) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - input_dict = {"text": test_text} - results = porn_detection_lstm.detection(data=input_dict) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print( - json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/module.py b/hub_module/modules/video/classification/videotag_tsn_lstm/module.py deleted file mode 100644 index f0988172e81d1594e10c5373637daa5493f3906b..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/module.py +++ /dev/null @@ -1,236 +0,0 @@ -# coding:utf-8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable -from paddlehub.common.logger import logger - -from videotag_tsn_lstm.resource.utils.config_utils import * -import videotag_tsn_lstm.resource.models as models -from videotag_tsn_lstm.resource.reader import get_reader -from videotag_tsn_lstm.resource.metrics import get_metrics -from videotag_tsn_lstm.resource.utils.utility import check_cuda -from videotag_tsn_lstm.resource.utils.utility import check_version - - -@moduleinfo( - name="videotag_tsn_lstm", - version="1.0.0", - summary= - "videotag_tsn_lstm is a video classification model, using TSN for feature extraction and AttentionLSTM for classification", - author="paddlepaddle", - author_email="paddle-dev@baidu.com", - type="video/classification", -) -class VideoTag(hub.Module): - def _initialize(self): - # add arg parser - self.parser = argparse.ArgumentParser( - description="Run the videotag_tsn_lstm module.", - prog='hub run videotag_tsn_lstm', - usage='%(prog)s', - add_help=True) - self.parser.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help='default use gpu.') - self.parser.add_argument( - '--input_path', - type=str, - default=None, - help='path of video data, single video') - self._has_load = False - - def _extractor(self, args, exe, place): - extractor_scope = fluid.Scope() - with fluid.scope_guard(extractor_scope): - extractor_startup_prog = fluid.Program() - extractor_main_prog = fluid.Program() - with fluid.program_guard(extractor_main_prog, - extractor_startup_prog): - extractor_config = parse_config(args.extractor_config) - extractor_infer_config = merge_configs(extractor_config, - 'infer', vars(args)) - - # build model - extractor_model = models.get_model( - "TSN", extractor_infer_config, mode='infer') - extractor_model.build_input(use_dataloader=False) - extractor_model.build_model() - extractor_feeds = extractor_model.feeds() - extractor_fetch_list = extractor_model.fetches() - - exe.run(extractor_startup_prog) - - logger.info('load extractor weights from {}'.format( - args.extractor_weights)) - extractor_model.load_test_weights(exe, args.extractor_weights, - extractor_main_prog) - - extractor_feeder = fluid.DataFeeder( - place=place, feed_list=extractor_feeds) - return extractor_main_prog, extractor_fetch_list, extractor_feeder, extractor_scope - - def _predictor(self, args, exe, place): - predictor_scope = fluid.Scope() - with fluid.scope_guard(predictor_scope): - predictor_startup_prog = fluid.default_startup_program() - predictor_main_prog = fluid.default_main_program() - with fluid.program_guard(predictor_main_prog, - predictor_startup_prog): - # parse config - predictor_config = parse_config(args.predictor_config) - predictor_infer_config = merge_configs(predictor_config, - 'infer', vars(args)) - - predictor_model = models.get_model( - "AttentionLSTM", predictor_infer_config, mode='infer') - predictor_model.build_input(use_dataloader=False) - predictor_model.build_model() - predictor_feeds = predictor_model.feeds() - predictor_outputs = predictor_model.outputs() - - exe.run(predictor_startup_prog) - - logger.info('load lstm weights from {}'.format( - args.predictor_weights)) - predictor_model.load_test_weights(exe, args.predictor_weights, - predictor_main_prog) - - predictor_feeder = fluid.DataFeeder( - place=place, feed_list=predictor_feeds) - predictor_fetch_list = predictor_model.fetches() - return predictor_main_prog, predictor_fetch_list, predictor_feeder, predictor_scope - - @runnable - def run_cmd(self, argsv): - args = self.parser.parse_args(argsv) - results = self.classify(paths=[args.input_path], use_gpu=args.use_gpu) - return results - - def classify(self, paths, use_gpu=False, threshold=0.5, top_k=10): - """ - API of Classification. - - Args: - paths (list[str]): the path of mp4s. - use_gpu (bool): whether to use gpu or not. - threshold (float): the result value >= threshold will be returned. - top_k (int): the top k result will be returned. - - Returns: - results (list[dict]): every dict includes the mp4 file path and prediction. - """ - args = self.parser.parse_args([]) - # config the args in videotag_tsn_lstm - args.use_gpu = use_gpu - args.filelist = paths - args.topk = top_k - args.threshold = threshold - args.extractor_config = os.path.join(self.directory, 'resource', - 'configs', 'tsn.yaml') - args.predictor_config = os.path.join(self.directory, 'resource', - 'configs', 'attention_lstm.yaml') - args.extractor_weights = os.path.join(self.directory, 'weights', 'tsn') - args.predictor_weights = os.path.join(self.directory, 'weights', - 'attention_lstm') - args.label_file = os.path.join(self.directory, 'resource', - 'label_3396.txt') - - check_cuda(args.use_gpu) - check_version() - - if not self._has_load: - self.place = fluid.CUDAPlace( - 0) if args.use_gpu else fluid.CPUPlace() - self.exe = fluid.Executor(self.place) - self.extractor_main_prog, self.extractor_fetch_list, self.extractor_feeder, self.extractor_scope = self._extractor( - args, self.exe, self.place) - self.predictor_main_prog, self.predictor_fetch_list, self.predictor_feeder, self.predictor_scope = self._predictor( - args, self.exe, self.place) - self._has_load = True - - extractor_config = parse_config(args.extractor_config) - extractor_infer_config = merge_configs(extractor_config, 'infer', - vars(args)) - extractor_reader = get_reader("TSN", 'infer', extractor_infer_config) - feature_list = [] - file_list = [] - - for idx, data in enumerate(extractor_reader()): - file_id = [item[-1] for item in data] - feed_data = [item[:-1] for item in data] - feature_out = self.exe.run( - program=self.extractor_main_prog, - fetch_list=self.extractor_fetch_list, - feed=self.extractor_feeder.feed(feed_data), - scope=self.extractor_scope) - feature_list.append(feature_out) - file_list.append(file_id) - logger.info( - '========[Stage 1 Sample {} ] Tsn feature extractor finished======' - .format(idx)) - - # get AttentionLSTM input from Tsn output - num_frames = 300 - predictor_feed_list = [] - for i in range(len(feature_list)): - feature_out = feature_list[i] - extractor_feature = feature_out[0] - predictor_feed_data = [[ - extractor_feature[0].astype(float)[0:num_frames, :] - ]] - predictor_feed_list.append((predictor_feed_data, file_list[i])) - - metrics_config = parse_config(args.predictor_config) - metrics_config['MODEL']['topk'] = args.topk - metrics_config['MODEL']['threshold'] = args.threshold - predictor_metrics = get_metrics("AttentionLSTM".upper(), 'infer', - metrics_config) - predictor_metrics.reset() - for idx, data in enumerate(predictor_feed_list): - file_id = data[1] - predictor_feed_data = data[0] - final_outs = self.exe.run( - program=self.predictor_main_prog, - fetch_list=self.predictor_fetch_list, - feed=self.predictor_feeder.feed(predictor_feed_data, ), - scope=self.predictor_scope) - logger.info( - '=======[Stage 2 Sample {} ] AttentionLSTM predict finished========' - .format(idx)) - final_result_list = [item for item in final_outs] + [file_id] - - predictor_metrics.accumulate(final_result_list) - results = predictor_metrics.finalize_and_log_out( - label_file=args.label_file) - return results - - -if __name__ == '__main__': - test_module = VideoTag() - print( - test_module.run_cmd( - argsv=['--input_path', "1.mp4", '--use_gpu', - str(False)])) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py deleted file mode 100644 index c6205007aef0c19915ba6d33e64196a547110d21..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import unicode_literals -from __future__ import print_function -from __future__ import division - -import io -import logging - -import numpy as np - -from videotag_tsn_lstm.resource.metrics.youtube8m import eval_util as youtube8m_metrics - -logger = logging.getLogger(__name__) - - -class Metrics(object): - def __init__(self, name, mode, metrics_args): - """Not implemented""" - pass - - def calculate_and_log_out(self, fetch_list, info=''): - """Not implemented""" - pass - - def accumulate(self, fetch_list, info=''): - """Not implemented""" - pass - - def finalize_and_log_out(self, info='', savedir='./'): - """Not implemented""" - pass - - def reset(self): - """Not implemented""" - pass - - -class Youtube8mMetrics(Metrics): - def __init__(self, name, mode, metrics_args): - self.name = name - self.mode = mode - self.num_classes = metrics_args['MODEL']['num_classes'] - self.topk = metrics_args['MODEL']['topk'] - self.threshold = metrics_args['MODEL']['threshold'] - - self.calculator = youtube8m_metrics.EvaluationMetrics( - self.num_classes, self.topk) - if self.mode == 'infer': - self.infer_results = [] - - def calculate_and_log_out(self, fetch_list, info=''): - loss = np.mean(np.array(fetch_list[0])) - pred = np.array(fetch_list[1]) - label = np.array(fetch_list[2]) - hit_at_one = youtube8m_metrics.calculate_hit_at_one(pred, label) - perr = youtube8m_metrics.calculate_precision_at_equal_recall_rate( - pred, label) - gap = youtube8m_metrics.calculate_gap(pred, label) - logger.info(info + ' , loss = {0}, Hit@1 = {1}, PERR = {2}, GAP = {3}'.format(\ - '%.6f' % loss, '%.2f' % hit_at_one, '%.2f' % perr, '%.2f' % gap)) - - def accumulate(self, fetch_list, info=''): - if self.mode == 'infer': - predictions = np.array(fetch_list[0]) - video_id = fetch_list[1] - for i in range(len(predictions)): - topk_inds = predictions[i].argsort()[0 - self.topk:] - topk_inds = topk_inds[::-1] - preds = predictions[i][topk_inds] - self.infer_results.append((video_id[i], topk_inds.tolist(), - preds.tolist())) - else: - loss = np.array(fetch_list[0]) - pred = np.array(fetch_list[1]) - label = np.array(fetch_list[2]) - self.calculator.accumulate(loss, pred, label) - - def finalize_and_log_out(self, info='', label_file='./label_3396.txt'): - if self.mode == 'infer': - all_res_list = [] - for index, item in enumerate(self.infer_results): - video_id = item[0] - f = io.open(label_file, "r", encoding="utf-8") - fl = f.readlines() - res = {} - res["path"] = video_id - res["prediction"] = {} - for i in range(len(item[1])): - class_id = item[1][i] - class_prob = item[2][i] - if class_prob < self.threshold: - continue - class_name = fl[class_id].split('\n')[0] - res["prediction"][class_name] = class_prob - if not res["prediction"]: - logger.warning( - "%s: No prediction exceeds the threshold = %s." % - (video_id, self.threshold)) - all_res_list.append(res) - return all_res_list - else: - epoch_info_dict = self.calculator.get() - logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\ - .format(epoch_info_dict['avg_hit_at_one'], epoch_info_dict['avg_perr'], \ - epoch_info_dict['avg_loss'], epoch_info_dict['aps'], epoch_info_dict['gap'])) - - def reset(self): - self.calculator.clear() - if self.mode == 'infer': - self.infer_results = [] - - -class MetricsZoo(object): - def __init__(self): - self.metrics_zoo = {} - - def regist(self, name, metrics): - assert metrics.__base__ == Metrics, "Unknow model type {}".format( - type(metrics)) - self.metrics_zoo[name] = metrics - - def get(self, name, mode, cfg): - for k, v in self.metrics_zoo.items(): - if k == name: - return v(name, mode, cfg) - raise KeyError(name, self.metrics_zoo.keys()) - - -# singleton metrics_zoo -metrics_zoo = MetricsZoo() - - -def regist_metrics(name, metrics): - metrics_zoo.regist(name, metrics) - - -def get_metrics(name, mode, cfg): - return metrics_zoo.get(name, mode, cfg) - - -# sort by alphabet -regist_metrics("ATTENTIONLSTM", Youtube8mMetrics) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py deleted file mode 100644 index f425dd2f2ed27ffa49434848d81471c636408674..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py +++ /dev/null @@ -1,275 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Calculate or keep track of the interpolated average precision. - -It provides an interface for calculating interpolated average precision for an -entire list or the top-n ranked items. For the definition of the -(non-)interpolated average precision: -http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf - -Example usages: -1) Use it as a static function call to directly calculate average precision for -a short ranked list in the memory. - -``` -import random - -p = np.array([random.random() for _ in xrange(10)]) -a = np.array([random.choice([0, 1]) for _ in xrange(10)]) - -ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a) -``` - -2) Use it as an object for long ranked list that cannot be stored in memory or -the case where partial predictions can be observed at a time (Tensorflow -predictions). In this case, we first call the function accumulate many times -to process parts of the ranked list. After processing all the parts, we call -peek_interpolated_ap_at_n. -``` -p1 = np.array([random.random() for _ in xrange(5)]) -a1 = np.array([random.choice([0, 1]) for _ in xrange(5)]) -p2 = np.array([random.random() for _ in xrange(5)]) -a2 = np.array([random.choice([0, 1]) for _ in xrange(5)]) - -# interpolated average precision at 10 using 1000 break points -calculator = average_precision_calculator.AveragePrecisionCalculator(10) -calculator.accumulate(p1, a1) -calculator.accumulate(p2, a2) -ap3 = calculator.peek_ap_at_n() -``` -""" - -import heapq -import random -import numbers - -import numpy - - -class AveragePrecisionCalculator(object): - """Calculate the average precision and average precision at n.""" - - def __init__(self, top_n=None): - """Construct an AveragePrecisionCalculator to calculate average precision. - - This class is used to calculate the average precision for a single label. - - Args: - top_n: A positive Integer specifying the average precision at n, or - None to use all provided data points. - - Raises: - ValueError: An error occurred when the top_n is not a positive integer. - """ - if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None): - raise ValueError("top_n must be a positive integer or None.") - - self._top_n = top_n # average precision at n - self._total_positives = 0 # total number of positives have seen - self._heap = [] # max heap of (prediction, actual) - - @property - def heap_size(self): - """Gets the heap size maintained in the class.""" - return len(self._heap) - - @property - def num_accumulated_positives(self): - """Gets the number of positive samples that have been accumulated.""" - return self._total_positives - - def accumulate(self, predictions, actuals, num_positives=None): - """Accumulate the predictions and their ground truth labels. - - After the function call, we may call peek_ap_at_n to actually calculate - the average precision. - Note predictions and actuals must have the same shape. - - Args: - predictions: a list storing the prediction scores. - actuals: a list storing the ground truth labels. Any value - larger than 0 will be treated as positives, otherwise as negatives. - num_positives = If the 'predictions' and 'actuals' inputs aren't complete, - then it's possible some true positives were missed in them. In that case, - you can provide 'num_positives' in order to accurately track recall. - - Raises: - ValueError: An error occurred when the format of the input is not the - numpy 1-D array or the shape of predictions and actuals does not match. - """ - if len(predictions) != len(actuals): - raise ValueError( - "the shape of predictions and actuals does not match.") - - if not num_positives is None: - if not isinstance(num_positives, - numbers.Number) or num_positives < 0: - raise ValueError( - "'num_positives' was provided but it wan't a nonzero number." - ) - - if not num_positives is None: - self._total_positives += num_positives - else: - self._total_positives += numpy.size(numpy.where(actuals > 0)) - topk = self._top_n - heap = self._heap - - for i in range(numpy.size(predictions)): - if topk is None or len(heap) < topk: - heapq.heappush(heap, (predictions[i], actuals[i])) - else: - if predictions[i] > heap[0][0]: # heap[0] is the smallest - heapq.heappop(heap) - heapq.heappush(heap, (predictions[i], actuals[i])) - - def clear(self): - """Clear the accumulated predictions.""" - self._heap = [] - self._total_positives = 0 - - def peek_ap_at_n(self): - """Peek the non-interpolated average precision at n. - - Returns: - The non-interpolated average precision at n (default 0). - If n is larger than the length of the ranked list, - the average precision will be returned. - """ - if self.heap_size <= 0: - return 0 - predlists = numpy.array(list(zip(*self._heap))) - - ap = self.ap_at_n( - predlists[0], - predlists[1], - n=self._top_n, - total_num_positives=self._total_positives) - return ap - - @staticmethod - def ap(predictions, actuals): - """Calculate the non-interpolated average precision. - - Args: - predictions: a numpy 1-D array storing the sparse prediction scores. - actuals: a numpy 1-D array storing the ground truth labels. Any value - larger than 0 will be treated as positives, otherwise as negatives. - - Returns: - The non-interpolated average precision at n. - If n is larger than the length of the ranked list, - the average precision will be returned. - - Raises: - ValueError: An error occurred when the format of the input is not the - numpy 1-D array or the shape of predictions and actuals does not match. - """ - return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None) - - @staticmethod - def ap_at_n(predictions, actuals, n=20, total_num_positives=None): - """Calculate the non-interpolated average precision. - - Args: - predictions: a numpy 1-D array storing the sparse prediction scores. - actuals: a numpy 1-D array storing the ground truth labels. Any value - larger than 0 will be treated as positives, otherwise as negatives. - n: the top n items to be considered in ap@n. - total_num_positives : (optionally) you can specify the number of total - positive - in the list. If specified, it will be used in calculation. - - Returns: - The non-interpolated average precision at n. - If n is larger than the length of the ranked list, - the average precision will be returned. - - Raises: - ValueError: An error occurred when - 1) the format of the input is not the numpy 1-D array; - 2) the shape of predictions and actuals does not match; - 3) the input n is not a positive integer. - """ - if len(predictions) != len(actuals): - raise ValueError( - "the shape of predictions and actuals does not match.") - - if n is not None: - if not isinstance(n, int) or n <= 0: - raise ValueError("n must be 'None' or a positive integer." - " It was '%s'." % n) - - ap = 0.0 - - predictions = numpy.array(predictions) - actuals = numpy.array(actuals) - - # add a shuffler to avoid overestimating the ap - predictions, actuals = AveragePrecisionCalculator._shuffle( - predictions, actuals) - sortidx = sorted( - range(len(predictions)), key=lambda k: predictions[k], reverse=True) - - if total_num_positives is None: - numpos = numpy.size(numpy.where(actuals > 0)) - else: - numpos = total_num_positives - - if numpos == 0: - return 0 - - if n is not None: - numpos = min(numpos, n) - delta_recall = 1.0 / numpos - poscount = 0.0 - - # calculate the ap - r = len(sortidx) - if n is not None: - r = min(r, n) - for i in range(r): - if actuals[sortidx[i]] > 0: - poscount += 1 - ap += poscount / (i + 1) * delta_recall - return ap - - @staticmethod - def _shuffle(predictions, actuals): - random.seed(0) - suffidx = random.sample(range(len(predictions)), len(predictions)) - predictions = predictions[suffidx] - actuals = actuals[suffidx] - return predictions, actuals - - @staticmethod - def _zero_one_normalize(predictions, epsilon=1e-7): - """Normalize the predictions to the range between 0.0 and 1.0. - - For some predictions like SVM predictions, we need to normalize them before - calculate the interpolated average precision. The normalization will not - change the rank in the original list and thus won't change the average - precision. - - Args: - predictions: a numpy 1-D array storing the sparse prediction scores. - epsilon: a small constant to avoid denominator being zero. - - Returns: - The normalized prediction. - """ - denominator = numpy.max(predictions) - numpy.min(predictions) - ret = (predictions - numpy.min(predictions)) / numpy.max( - denominator, epsilon) - return ret diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py deleted file mode 100644 index 712abe1aa98e2de70026e736a63ad09f9faa2bf1..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Provides functions to help with evaluating models.""" -import datetime -import numpy - -from . import mean_average_precision_calculator as map_calculator -from . import average_precision_calculator as ap_calculator - - -def flatten(l): - """ Merges a list of lists into a single list. """ - return [item for sublist in l for item in sublist] - - -def calculate_hit_at_one(predictions, actuals): - """Performs a local (numpy) calculation of the hit at one. - - Args: - predictions: Matrix containing the outputs of the model. - Dimensions are 'batch' x 'num_classes'. - actuals: Matrix containing the ground truth labels. - Dimensions are 'batch' x 'num_classes'. - - Returns: - float: The average hit at one across the entire batch. - """ - top_prediction = numpy.argmax(predictions, 1) - hits = actuals[numpy.arange(actuals.shape[0]), top_prediction] - return numpy.average(hits) - - -def calculate_precision_at_equal_recall_rate(predictions, actuals): - """Performs a local (numpy) calculation of the PERR. - - Args: - predictions: Matrix containing the outputs of the model. - Dimensions are 'batch' x 'num_classes'. - actuals: Matrix containing the ground truth labels. - Dimensions are 'batch' x 'num_classes'. - - Returns: - float: The average precision at equal recall rate across the entire batch. - """ - aggregated_precision = 0.0 - num_videos = actuals.shape[0] - for row in numpy.arange(num_videos): - num_labels = int(numpy.sum(actuals[row])) - top_indices = numpy.argpartition(predictions[row], - -num_labels)[-num_labels:] - item_precision = 0.0 - for label_index in top_indices: - if predictions[row][label_index] > 0: - item_precision += actuals[row][label_index] - item_precision /= top_indices.size - aggregated_precision += item_precision - aggregated_precision /= num_videos - return aggregated_precision - - -def calculate_gap(predictions, actuals, top_k=20): - """Performs a local (numpy) calculation of the global average precision. - - Only the top_k predictions are taken for each of the videos. - - Args: - predictions: Matrix containing the outputs of the model. - Dimensions are 'batch' x 'num_classes'. - actuals: Matrix containing the ground truth labels. - Dimensions are 'batch' x 'num_classes'. - top_k: How many predictions to use per video. - - Returns: - float: The global average precision. - """ - gap_calculator = ap_calculator.AveragePrecisionCalculator() - sparse_predictions, sparse_labels, num_positives = top_k_by_class( - predictions, actuals, top_k) - gap_calculator.accumulate( - flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) - return gap_calculator.peek_ap_at_n() - - -def top_k_by_class(predictions, labels, k=20): - """Extracts the top k predictions for each video, sorted by class. - - Args: - predictions: A numpy matrix containing the outputs of the model. - Dimensions are 'batch' x 'num_classes'. - k: the top k non-zero entries to preserve in each prediction. - - Returns: - A tuple (predictions,labels, true_positives). 'predictions' and 'labels' - are lists of lists of floats. 'true_positives' is a list of scalars. The - length of the lists are equal to the number of classes. The entries in the - predictions variable are probability predictions, and - the corresponding entries in the labels variable are the ground truth for - those predictions. The entries in 'true_positives' are the number of true - positives for each class in the ground truth. - - Raises: - ValueError: An error occurred when the k is not a positive integer. - """ - if k <= 0: - raise ValueError("k must be a positive integer.") - k = min(k, predictions.shape[1]) - num_classes = predictions.shape[1] - prediction_triplets = [] - for video_index in range(predictions.shape[0]): - prediction_triplets.extend( - top_k_triplets(predictions[video_index], labels[video_index], k)) - out_predictions = [[] for v in range(num_classes)] - out_labels = [[] for v in range(num_classes)] - for triplet in prediction_triplets: - out_predictions[triplet[0]].append(triplet[1]) - out_labels[triplet[0]].append(triplet[2]) - out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)] - - return out_predictions, out_labels, out_true_positives - - -def top_k_triplets(predictions, labels, k=20): - """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in - (prediction, class) format""" - m = len(predictions) - k = min(k, m) - indices = numpy.argpartition(predictions, -k)[-k:] - return [(index, predictions[index], labels[index]) for index in indices] - - -class EvaluationMetrics(object): - """A class to store the evaluation metrics.""" - - def __init__(self, num_class, top_k): - """Construct an EvaluationMetrics object to store the evaluation metrics. - - Args: - num_class: A positive integer specifying the number of classes. - top_k: A positive integer specifying how many predictions are considered per video. - - Raises: - ValueError: An error occurred when MeanAveragePrecisionCalculator cannot - not be constructed. - """ - self.sum_hit_at_one = 0.0 - self.sum_perr = 0.0 - self.sum_loss = 0.0 - self.map_calculator = map_calculator.MeanAveragePrecisionCalculator( - num_class) - self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator() - self.top_k = top_k - self.num_examples = 0 - - #def accumulate(self, predictions, labels, loss): - def accumulate(self, loss, predictions, labels): - """Accumulate the metrics calculated locally for this mini-batch. - - Args: - predictions: A numpy matrix containing the outputs of the model. - Dimensions are 'batch' x 'num_classes'. - labels: A numpy matrix containing the ground truth labels. - Dimensions are 'batch' x 'num_classes'. - loss: A numpy array containing the loss for each sample. - - Returns: - dictionary: A dictionary storing the metrics for the mini-batch. - - Raises: - ValueError: An error occurred when the shape of predictions and actuals - does not match. - """ - batch_size = labels.shape[0] - mean_hit_at_one = calculate_hit_at_one(predictions, labels) - mean_perr = calculate_precision_at_equal_recall_rate( - predictions, labels) - mean_loss = numpy.mean(loss) - - # Take the top 20 predictions. - sparse_predictions, sparse_labels, num_positives = top_k_by_class( - predictions, labels, self.top_k) - self.map_calculator.accumulate(sparse_predictions, sparse_labels, - num_positives) - self.global_ap_calculator.accumulate( - flatten(sparse_predictions), flatten(sparse_labels), - sum(num_positives)) - - self.num_examples += batch_size - self.sum_hit_at_one += mean_hit_at_one * batch_size - self.sum_perr += mean_perr * batch_size - self.sum_loss += mean_loss * batch_size - - return { - "hit_at_one": mean_hit_at_one, - "perr": mean_perr, - "loss": mean_loss - } - - def get(self): - """Calculate the evaluation metrics for the whole epoch. - - Raises: - ValueError: If no examples were accumulated. - - Returns: - dictionary: a dictionary storing the evaluation metrics for the epoch. The - dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and - aps (default nan). - """ - if self.num_examples <= 0: - raise ValueError("total_sample must be positive.") - avg_hit_at_one = self.sum_hit_at_one / self.num_examples - avg_perr = self.sum_perr / self.num_examples - avg_loss = self.sum_loss / self.num_examples - - aps = self.map_calculator.peek_map_at_n() - gap = self.global_ap_calculator.peek_ap_at_n() - - epoch_info_dict = {} - return { - "avg_hit_at_one": avg_hit_at_one, - "avg_perr": avg_perr, - "avg_loss": avg_loss, - "aps": aps, - "gap": gap - } - - def clear(self): - """Clear the evaluation metrics and reset the EvaluationMetrics object.""" - self.sum_hit_at_one = 0.0 - self.sum_perr = 0.0 - self.sum_loss = 0.0 - self.map_calculator.clear() - self.global_ap_calculator.clear() - self.num_examples = 0 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py deleted file mode 100644 index a8415dc511c89ef7ed78c55691f544380127b3f1..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS-IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Calculate the mean average precision. - -It provides an interface for calculating mean average precision -for an entire list or the top-n ranked items. - -Example usages: -We first call the function accumulate many times to process parts of the ranked -list. After processing all the parts, we call peek_map_at_n -to calculate the mean average precision. - -``` -import random - -p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)]) -a = np.array([[random.choice([0, 1]) for _ in xrange(50)] - for _ in xrange(1000)]) - -# mean average precision for 50 classes. -calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator( - num_class=50) -calculator.accumulate(p, a) -aps = calculator.peek_map_at_n() -``` -""" - -import numpy -from . import average_precision_calculator - - -class MeanAveragePrecisionCalculator(object): - """This class is to calculate mean average precision. - """ - - def __init__(self, num_class): - """Construct a calculator to calculate the (macro) average precision. - - Args: - num_class: A positive Integer specifying the number of classes. - top_n_array: A list of positive integers specifying the top n for each - class. The top n in each class will be used to calculate its average - precision at n. - The size of the array must be num_class. - - Raises: - ValueError: An error occurred when num_class is not a positive integer; - or the top_n_array is not a list of positive integers. - """ - if not isinstance(num_class, int) or num_class <= 1: - raise ValueError("num_class must be a positive integer.") - - self._ap_calculators = [] # member of AveragePrecisionCalculator - self._num_class = num_class # total number of classes - for i in range(num_class): - self._ap_calculators.append( - average_precision_calculator.AveragePrecisionCalculator()) - - def accumulate(self, predictions, actuals, num_positives=None): - """Accumulate the predictions and their ground truth labels. - - Args: - predictions: A list of lists storing the prediction scores. The outer - dimension corresponds to classes. - actuals: A list of lists storing the ground truth labels. The dimensions - should correspond to the predictions input. Any value - larger than 0 will be treated as positives, otherwise as negatives. - num_positives: If provided, it is a list of numbers representing the - number of true positives for each class. If not provided, the number of - true positives will be inferred from the 'actuals' array. - - Raises: - ValueError: An error occurred when the shape of predictions and actuals - does not match. - """ - if not num_positives: - num_positives = [None for i in predictions.shape[1]] - - calculators = self._ap_calculators - for i in range(len(predictions)): - calculators[i].accumulate(predictions[i], actuals[i], - num_positives[i]) - - def clear(self): - for calculator in self._ap_calculators: - calculator.clear() - - def is_empty(self): - return ([calculator.heap_size for calculator in self._ap_calculators - ] == [0 for _ in range(self._num_class)]) - - def peek_map_at_n(self): - """Peek the non-interpolated mean average precision at n. - - Returns: - An array of non-interpolated average precision at n (default 0) for each - class. - """ - aps = [ - self._ap_calculators[i].peek_ap_at_n() - for i in range(self._num_class) - ] - return aps diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py deleted file mode 100644 index 4bd0630732c78e664ea5bcfc1fe00d550b6106b2..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import logging - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -from ..model import ModelBase -from .lstm_attention import LSTMAttentionModel - -__all__ = ["AttentionLSTM"] -logger = logging.getLogger(__name__) - - -class AttentionLSTM(ModelBase): - def __init__(self, name, cfg, mode='train'): - super(AttentionLSTM, self).__init__(name, cfg, mode) - self.get_config() - - def get_config(self): - # get model configs - self.feature_num = self.cfg.MODEL.feature_num - self.feature_names = self.cfg.MODEL.feature_names - self.feature_dims = self.cfg.MODEL.feature_dims - self.num_classes = self.cfg.MODEL.num_classes - self.embedding_size = self.cfg.MODEL.embedding_size - self.lstm_size = self.cfg.MODEL.lstm_size - self.drop_rate = self.cfg.MODEL.drop_rate - - # get mode configs - self.batch_size = self.get_config_from_sec(self.mode, 'batch_size', 1) - self.num_gpus = self.get_config_from_sec(self.mode, 'num_gpus', 1) - - def build_input(self, use_dataloader): - self.feature_input = [] - for name, dim in zip(self.feature_names, self.feature_dims): - self.feature_input.append( - fluid.data( - shape=[None, dim], lod_level=1, dtype='float32', name=name)) - if use_dataloader: - assert self.mode != 'infer', \ - 'dataloader is not recommendated when infer, please set use_dataloader to be false.' - self.dataloader = fluid.io.DataLoader.from_generator( - feed_list=self.feature_input, #+ [self.label_input], - capacity=8, - iterable=True) - - def build_model(self): - att_outs = [] - for i, (input_dim, feature) in enumerate( - zip(self.feature_dims, self.feature_input)): - att = LSTMAttentionModel(input_dim, self.embedding_size, - self.lstm_size, self.drop_rate) - att_out = att.forward(feature, is_training=(self.mode == 'train')) - att_outs.append(att_out) - if len(att_outs) > 1: - out = fluid.layers.concat(att_outs, axis=1) - else: - out = att_outs[0] - - fc1 = fluid.layers.fc( - input=out, - size=8192, - act='relu', - bias_attr=ParamAttr( - regularizer=fluid.regularizer.L2Decay(0.0), - initializer=fluid.initializer.NormalInitializer(scale=0.0)), - name='fc1') - fc2 = fluid.layers.fc( - input=fc1, - size=4096, - act='tanh', - bias_attr=ParamAttr( - regularizer=fluid.regularizer.L2Decay(0.0), - initializer=fluid.initializer.NormalInitializer(scale=0.0)), - name='fc2') - - self.logit = fluid.layers.fc(input=fc2, size=self.num_classes, act=None, \ - bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), - initializer=fluid.initializer.NormalInitializer(scale=0.0)), - name = 'output') - - self.output = fluid.layers.sigmoid(self.logit) - - def optimizer(self): - assert self.mode == 'train', "optimizer only can be get in train mode" - values = [ - self.learning_rate * (self.decay_gamma**i) - for i in range(len(self.decay_epochs) + 1) - ] - iter_per_epoch = self.num_samples / self.batch_size - boundaries = [e * iter_per_epoch for e in self.decay_epochs] - return fluid.optimizer.RMSProp( - learning_rate=fluid.layers.piecewise_decay( - values=values, boundaries=boundaries), - centered=True, - regularization=fluid.regularizer.L2Decay(self.weight_decay)) - - def loss(self): - assert self.mode != 'infer', "invalid loss calculationg in infer mode" - cost = fluid.layers.sigmoid_cross_entropy_with_logits( - x=self.logit, label=self.label_input) - cost = fluid.layers.reduce_sum(cost, dim=-1) - sum_cost = fluid.layers.reduce_sum(cost) - self.loss_ = fluid.layers.scale( - sum_cost, scale=self.num_gpus, bias_after_scale=False) - return self.loss_ - - def outputs(self): - return [self.output, self.logit] - - def feeds(self): - return self.feature_input - - def fetches(self): - fetch_list = [self.output] - return fetch_list - - def weights_info(self): - return ( - 'AttentionLSTM.pdparams', - 'https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM.pdparams' - ) - - def load_pretrain_params(self, exe, pretrain, prog, place): - logger.info( - "Load pretrain weights from {}, exclude fc layer.".format(pretrain)) - - state_dict = fluid.load_program_state(pretrain) - dict_keys = list(state_dict.keys()) - for name in dict_keys: - if "fc_0" in name: - del state_dict[name] - logger.info( - 'Delete {} from pretrained parameters. Do not load it'. - format(name)) - fluid.set_program_state(prog, state_dict) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py deleted file mode 100644 index d92da5c33c8fe33a05681842e5798834420af2b9..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - - -class LSTMAttentionModel(object): - """LSTM Attention Model""" - - def __init__(self, - bias_attr, - embedding_size=512, - lstm_size=1024, - drop_rate=0.5): - self.lstm_size = lstm_size - self.embedding_size = embedding_size - self.drop_rate = drop_rate - - def forward(self, input, is_training): - input_fc = fluid.layers.fc( - input=input, - size=self.embedding_size, - act='tanh', - bias_attr=ParamAttr( - regularizer=fluid.regularizer.L2Decay(0.0), - initializer=fluid.initializer.NormalInitializer(scale=0.0)), - name='rgb_fc') - - lstm_forward_fc = fluid.layers.fc( - input=input_fc, - size=self.lstm_size * 4, - act=None, - bias_attr=False, - name='rgb_fc_forward') - - lstm_forward, _ = fluid.layers.dynamic_lstm( - input=lstm_forward_fc, - size=self.lstm_size * 4, - is_reverse=False, - name='rgb_lstm_forward') - - lsmt_backward_fc = fluid.layers.fc( - input=input_fc, - size=self.lstm_size * 4, - act=None, - bias_attr=False, - name='rgb_fc_backward') - - lstm_backward, _ = fluid.layers.dynamic_lstm( - input=lsmt_backward_fc, - size=self.lstm_size * 4, - is_reverse=True, - name='rgb_lstm_backward') - - lstm_concat = fluid.layers.concat( - input=[lstm_forward, lstm_backward], axis=1) - - lstm_dropout = fluid.layers.dropout( - x=lstm_concat, - dropout_prob=self.drop_rate, - is_test=(not is_training)) - - lstm_weight = fluid.layers.fc( - input=lstm_dropout, - size=1, - act='sequence_softmax', - bias_attr=False, - name='rgb_weight') - - scaled = fluid.layers.elementwise_mul( - x=lstm_dropout, y=lstm_weight, axis=0) - lstm_pool = fluid.layers.sequence_pool(input=scaled, pool_type='sum') - - return lstm_pool diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py deleted file mode 100644 index f5733835913346f4c876c8d67139264be289e36c..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/model.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import logging -try: - from configparser import ConfigParser -except: - from ConfigParser import ConfigParser - -import paddle.fluid as fluid - -WEIGHT_DIR = os.path.join(os.path.expanduser('~'), '.paddle', 'weights') - -logger = logging.getLogger(__name__) - - -def is_parameter(var): - return isinstance(var, fluid.framework.Parameter) - - -class NotImplementError(Exception): - "Error: model function not implement" - - def __init__(self, model, function): - super(NotImplementError, self).__init__() - self.model = model.__class__.__name__ - self.function = function.__name__ - - def __str__(self): - return "Function {}() is not implemented in model {}".format( - self.function, self.model) - - -class ModelNotFoundError(Exception): - "Error: model not found" - - def __init__(self, model_name, avail_models): - super(ModelNotFoundError, self).__init__() - self.model_name = model_name - self.avail_models = avail_models - - def __str__(self): - msg = "Model {} Not Found.\nAvailiable models:\n".format( - self.model_name) - for model in self.avail_models: - msg += " {}\n".format(model) - return msg - - -class ModelBase(object): - def __init__(self, name, cfg, mode='train'): - assert mode in ['train', 'valid', 'test', 'infer'], \ - "Unknown mode type {}".format(mode) - self.name = name - self.is_training = (mode == 'train') - self.mode = mode - self.cfg = cfg - self.dataloader = None - - def build_model(self): - "build model struct" - raise NotImplementError(self, self.build_model) - - def build_input(self, use_dataloader): - "build input Variable" - raise NotImplementError(self, self.build_input) - - def optimizer(self): - "get model optimizer" - raise NotImplementError(self, self.optimizer) - - def outputs(self): - "get output variable" - raise NotImplementedError(self, self.outputs) - - def loss(self): - "get loss variable" - raise NotImplementedError(self, self.loss) - - def feeds(self): - "get feed inputs list" - raise NotImplementError(self, self.feeds) - - def fetches(self): - "get fetch list of model" - raise NotImplementError(self, self.fetches) - - def weights_info(self): - "get model weight default path and download url" - raise NotImplementError(self, self.weights_info) - - def dataloader(self): - return self.dataloader - - def epoch_num(self): - "get train epoch num" - return self.cfg.TRAIN.epoch - - def pretrain_info(self): - "get pretrain base model directory" - return (None, None) - - def load_pretrain_params(self, exe, pretrain, prog, place): - logger.info("Load pretrain weights from {}".format(pretrain)) - state_dict = fluid.load_program_state(pretrain) - fluid.set_program_state(prog, state_dict) - - def load_test_weights(self, exe, weights, prog): - params_list = list(filter(is_parameter, prog.list_vars())) - fluid.load(prog, weights, executor=exe, var_list=params_list) - - def get_config_from_sec(self, sec, item, default=None): - if sec.upper() not in self.cfg: - return default - return self.cfg[sec.upper()].get(item, default) - - -class ModelZoo(object): - def __init__(self): - self.model_zoo = {} - - def regist(self, name, model): - assert model.__base__ == ModelBase, "Unknow model type {}".format( - type(model)) - self.model_zoo[name] = model - - def get(self, name, cfg, mode='train'): - for k, v in self.model_zoo.items(): - if k.upper() == name.upper(): - return v(name, cfg, mode) - raise ModelNotFoundError(name, self.model_zoo.keys()) - - -# singleton model_zoo -model_zoo = ModelZoo() - - -def regist_model(name, model): - model_zoo.regist(name, model) - - -def get_model(name, cfg, mode='train'): - return model_zoo.get(name, cfg, mode) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py deleted file mode 100644 index 1c4e3ebf68bf7327acf0f9c58ed65d769648235c..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py +++ /dev/null @@ -1,107 +0,0 @@ -import json - -depth = [3, 4, 23, 3] -num_filters = [64, 128, 256, 512] - -layer_index = 1 -caffe_param_list = [] - -name_list = ['conv1'] -params_list = [] -name = name_list[0] -conv_w = name + '_weights' -caffe_conv_w = 'ConvNdBackward' + str(layer_index) + '_weights' -params_list.append(conv_w) -caffe_param_list.append(caffe_conv_w) - -layer_index += 1 - -bn_name = "bn_" + name -caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' -params_list.append(bn_name + '_scale') -params_list.append(bn_name + '_offset') -params_list.append(bn_name + '_mean') -params_list.append(bn_name + '_variance') - -caffe_param_list.append(caffe_bn_name + '_scale') -caffe_param_list.append(caffe_bn_name + '_offset') -caffe_param_list.append(caffe_bn_name + '_mean') -caffe_param_list.append(caffe_bn_name + '_variance') - -filter_input = 64 - -layer_index += 3 - -for block in range(len(depth)): - for i in range(depth[block]): - if block == 2: - if i == 0: - name = "res" + str(block + 2) + "a" - else: - name = "res" + str(block + 2) + "b" + str(i) - else: - name = "res" + str(block + 2) + chr(97 + i) - - name_list.append(name) - - for item in ['a', 'b', 'c']: - name_branch = name + '_branch2' + item - bn_name = 'bn' + name_branch[3:] - params_list.append(name_branch + '_weights') - params_list.append(bn_name + '_scale') - params_list.append(bn_name + '_offset') - params_list.append(bn_name + '_mean') - params_list.append(bn_name + '_variance') - - caffe_name_branch = 'ConvNdBackward' + str(layer_index) - caffe_param_list.append(caffe_name_branch + '_weights') - - layer_index += 1 - caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' - caffe_param_list.append(caffe_bn_name + '_scale') - caffe_param_list.append(caffe_bn_name + '_offset') - caffe_param_list.append(caffe_bn_name + '_mean') - caffe_param_list.append(caffe_bn_name + '_variance') - - layer_index += 2 - - stride = 2 if i == 0 and block != 0 else 1 - filter_num = num_filters[block] - filter_output = filter_num * 4 - - if (filter_output != filter_input) or (stride != 1): - name_branch = name + '_branch1' - - print( - 'filter_input {}, filter_output {}, stride {}, branch name {}'. - format(filter_input, filter_output, stride, name_branch)) - bn_name = 'bn' + name_branch[3:] - params_list.append(name_branch + '_weights') - params_list.append(bn_name + '_scale') - params_list.append(bn_name + '_offset') - params_list.append(bn_name + '_mean') - params_list.append(bn_name + '_variance') - - caffe_name_branch = 'ConvNdBackward' + str(layer_index) - caffe_param_list.append(caffe_name_branch + '_weights') - - layer_index += 1 - caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' - caffe_param_list.append(caffe_bn_name + '_scale') - caffe_param_list.append(caffe_bn_name + '_offset') - caffe_param_list.append(caffe_bn_name + '_mean') - caffe_param_list.append(caffe_bn_name + '_variance') - - layer_index += 3 - else: - layer_index += 2 - - filter_input = filter_output - -map_dict = {} - -for i in range(len(params_list)): - print(params_list[i], caffe_param_list[i]) - map_dict[params_list[i]] = caffe_param_list[i] - -json.dump(map_dict, open('name_map.json', 'w')) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py deleted file mode 100644 index b0ff2727701ae92a359946671aac4b819d7a5db6..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import numpy as np - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -from ..model import ModelBase -from .tsn_res_model import TSN_ResNet - -import logging -logger = logging.getLogger(__name__) - -__all__ = ["TSN"] - - -class TSN(ModelBase): - def __init__(self, name, cfg, mode='train'): - super(TSN, self).__init__(name, cfg, mode=mode) - self.get_config() - - def get_config(self): - self.num_classes = self.get_config_from_sec('model', 'num_classes') - self.seg_num = self.get_config_from_sec('model', 'seg_num') - self.seglen = self.get_config_from_sec('model', 'seglen') - self.image_mean = self.get_config_from_sec('model', 'image_mean') - self.image_std = self.get_config_from_sec('model', 'image_std') - self.num_layers = self.get_config_from_sec('model', 'num_layers') - - self.num_epochs = self.get_config_from_sec('train', 'epoch') - self.total_videos = self.get_config_from_sec('train', 'total_videos') - self.base_learning_rate = self.get_config_from_sec( - 'train', 'learning_rate') - self.learning_rate_decay = self.get_config_from_sec( - 'train', 'learning_rate_decay') - self.l2_weight_decay = self.get_config_from_sec('train', - 'l2_weight_decay') - self.momentum = self.get_config_from_sec('train', 'momentum') - - self.seg_num = self.get_config_from_sec(self.mode, 'seg_num', - self.seg_num) - self.target_size = self.get_config_from_sec(self.mode, 'target_size') - self.batch_size = self.get_config_from_sec(self.mode, 'batch_size') - - def build_input(self, use_dataloader=True): - image_shape = [3, self.target_size, self.target_size] - image_shape[0] = image_shape[0] * self.seglen - image_shape = [None, self.seg_num] + image_shape - self.use_dataloader = use_dataloader - - image = fluid.data(name='image', shape=image_shape, dtype='float32') - if self.mode != 'infer': - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - else: - label = None - - if use_dataloader: - assert self.mode != 'infer', \ - 'dataloader is not recommendated when infer, please set use_dataloader to be false.' - self.dataloader = fluid.io.DataLoader.from_generator( - feed_list=[image, label], capacity=4, iterable=True) - - self.feature_input = [image] - self.label_input = label - - def create_model_args(self): - cfg = {} - cfg['layers'] = self.num_layers - cfg['class_dim'] = self.num_classes - cfg['seg_num'] = self.seg_num - return cfg - - def build_model(self): - cfg = self.create_model_args() - videomodel = TSN_ResNet( - layers=cfg['layers'], - seg_num=cfg['seg_num'], - is_training=(self.mode == 'train')) - out = videomodel.net( - input=self.feature_input[0], class_dim=cfg['class_dim']) - self.feature_output = out - #self.network_outputs = [out] - - def optimizer(self): - assert self.mode == 'train', "optimizer only can be get in train mode" - epoch_points = [self.num_epochs / 3, self.num_epochs * 2 / 3] - total_videos = self.total_videos - step = int(total_videos / self.batch_size + 1) - bd = [e * step for e in epoch_points] - base_lr = self.base_learning_rate - lr_decay = self.learning_rate_decay - lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay] - l2_weight_decay = self.l2_weight_decay - momentum = self.momentum - optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), - momentum=momentum, - regularization=fluid.regularizer.L2Decay(l2_weight_decay)) - - return optimizer - - def loss(self): - assert self.mode != 'infer', "invalid loss calculationg in infer mode" - cost = fluid.layers.cross_entropy(input=self.network_outputs[0], \ - label=self.label_input, ignore_index=-1) - self.loss_ = fluid.layers.mean(x=cost) - return self.loss_ - - def outputs(self): - return self.network_outputs - - def feeds(self): - return self.feature_input #if self.mode == 'infer' else self.feature_input + [ -# self.label_input -# ] - - def fetches(self): - if self.mode == 'train' or self.mode == 'valid': - losses = self.loss() - fetch_list = [losses, self.network_outputs[0], self.label_input] - elif self.mode == 'test': - #losses = self.loss() - fetch_list = [self.feature_output, self.label_input] - elif self.mode == 'infer': - fetch_list = self.feature_output - else: - raise NotImplementedError('mode {} not implemented'.format( - self.mode)) - - return fetch_list - - def pretrain_info(self): - return ( - 'ResNet50_pretrained', - 'https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz' - ) - - def weights_info(self): - return ( - 'TSN.pdparams', - 'https://paddlemodels.bj.bcebos.com/video_classification/TSN.pdparams' - ) - - def load_pretrain_params(self, exe, pretrain, prog, place): - def is_parameter(var): - return isinstance(var, fluid.framework.Parameter) - - params_list = list(filter(is_parameter, prog.list_vars())) - for param in params_list: - print(param.name) - - logger.info( - "Load pretrain weights from {}, exclude fc layer.".format(pretrain)) - - state_dict = fluid.load_program_state(pretrain) - dict_keys = list(state_dict.keys()) - for name in dict_keys: - if "fc_0" in name: - del state_dict[name] - print('Delete {} from pretrained parameters. Do not load it'. - format(name)) - fluid.set_program_state(prog, state_dict) - - -# def load_test_weights(self, exe, weights, prog): -# def is_parameter(var): -# return isinstance(var, fluid.framework.Parameter) -# params_list = list(filter(is_parameter, prog.list_vars())) - -# state_dict = np.load(weights) -# for p in params_list: -# if p.name in state_dict.keys(): -# print('########### load param {} from file'.format(p.name)) -# else: -# print('----------- param {} not in file'.format(p.name)) -# fluid.set_program_state(prog, state_dict) -# fluid.save(prog, './model_weight/tsn') diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py deleted file mode 100644 index c2e90fe46e84b77d241d73273fb6269669ba7b46..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import time -import sys -import paddle.fluid as fluid -import math - - -class TSN_ResNet(): - def __init__(self, layers=50, seg_num=7, is_training=True): - self.layers = 101 #layers - self.seg_num = seg_num - self.is_training = is_training - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - - return fluid.layers.batch_norm( - input=conv, - act=act, - is_test=(not self.is_training), - param_attr=fluid.param_attr.ParamAttr(name=bn_name + "_scale"), - bias_attr=fluid.param_attr.ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, num_filters * 4, stride, name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - def net(self, input, class_dim=101): - layers = self.layers - seg_num = self.seg_num - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - # reshape input - channels = input.shape[2] - short_size = input.shape[3] - input = fluid.layers.reshape( - x=input, shape=[-1, channels, short_size, short_size]) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu', - name='conv1') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - - feature = fluid.layers.reshape( - x=pool, shape=[-1, seg_num, pool.shape[1]]) - return feature diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py deleted file mode 100644 index 06847d0e04f9981bc5bdaa25f17fb8b76a69d814..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -import sys - -import random -import functools -import logging -try: - import cPickle as pickle - from cStringIO import StringIO -except ImportError: - import pickle - from io import BytesIO - -import paddle -import cv2 -import numpy as np -from PIL import Image - -from .reader_utils import DataReader - -logger = logging.getLogger(__name__) -python_ver = sys.version_info - - -class KineticsReader(DataReader): - """ - Data reader for kinetics dataset of two format mp4 and pkl. - 1. mp4, the original format of kinetics400 - 2. pkl, the mp4 was decoded previously and stored as pkl - In both case, load the data, and then get the frame data in the form of numpy and label as an integer. - dataset cfg: format - num_classes - seg_num - short_size - target_size - num_reader_threads - buf_size - image_mean - image_std - batch_size - list - """ - - def __init__(self, name, mode, cfg): - super(KineticsReader, self).__init__(name, mode, cfg) - self.format = cfg.MODEL.format - self.num_classes = self.get_config_from_sec('model', 'num_classes') - self.seg_num = self.get_config_from_sec('model', 'seg_num') - self.seglen = self.get_config_from_sec('model', 'seglen') - - self.seg_num = self.get_config_from_sec(mode, 'seg_num', self.seg_num) - self.short_size = self.get_config_from_sec(mode, 'short_size') - self.target_size = self.get_config_from_sec(mode, 'target_size') - self.num_reader_threads = self.get_config_from_sec( - mode, 'num_reader_threads') - self.buf_size = self.get_config_from_sec(mode, 'buf_size') - - self.img_mean = np.array(cfg.MODEL.image_mean).reshape( - [3, 1, 1]).astype(np.float32) - self.img_std = np.array(cfg.MODEL.image_std).reshape([3, 1, 1]).astype( - np.float32) - # set batch size and file list - self.batch_size = cfg[mode.upper()]['batch_size'] - self.filelist = cfg[mode.upper()]['filelist'] - - def create_reader(self): - _reader = self._reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \ - short_size = self.short_size, target_size = self.target_size, \ - img_mean = self.img_mean, img_std = self.img_std, \ - shuffle = (self.mode == 'train'), \ - num_threads = self.num_reader_threads, \ - buf_size = self.buf_size, format = self.format) - - def _batch_reader(): - batch_out = [] - for imgs, label in _reader(): - if imgs is None: - continue - batch_out.append((imgs, label)) - if len(batch_out) == self.batch_size: - yield batch_out - batch_out = [] - - return _batch_reader - - def _reader_creator(self, - pickle_list, - mode, - seg_num, - seglen, - short_size, - target_size, - img_mean, - img_std, - shuffle=False, - num_threads=1, - buf_size=1024, - format='pkl'): - def decode_mp4(sample, mode, seg_num, seglen, short_size, target_size, - img_mean, img_std): - sample = sample[0].split(' ') - mp4_path = sample[0] - try: - imgs = mp4_loader(mp4_path, seg_num, seglen, mode) - if len(imgs) < 1: - logger.error('{} frame length {} less than 1.'.format( - mp4_path, len(imgs))) - return None, None - except: - logger.error('Error when loading {}'.format(mp4_path)) - return None, None - - return imgs_transform(imgs, mode, seg_num, seglen, \ - short_size, target_size, img_mean, img_std, name = self.name), mp4_path - - def reader(): - lines = [line.strip() for line in pickle_list] - if shuffle: - random.shuffle(lines) - for line in lines: - pickle_path = line.strip() - yield [pickle_path] - - mapper = functools.partial( - decode_mp4, - mode=mode, - seg_num=seg_num, - seglen=seglen, - short_size=short_size, - target_size=target_size, - img_mean=img_mean, - img_std=img_std) - - return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size) - - -def imgs_transform(imgs, - mode, - seg_num, - seglen, - short_size, - target_size, - img_mean, - img_std, - name=''): - imgs = group_scale(imgs, short_size) - - np_imgs = np.array([np.array(img).astype('float32') for img in imgs]) #dhwc - np_imgs = group_center_crop(np_imgs, target_size) - np_imgs = np_imgs.transpose(0, 3, 1, 2) / 255 #dchw - np_imgs -= img_mean - np_imgs /= img_std - - return np_imgs - - -def group_center_crop(np_imgs, target_size): - d, h, w, c = np_imgs.shape - th, tw = target_size, target_size - assert (w >= target_size) and (h >= target_size), \ - "image width({}) and height({}) should be larger than crop size".format(w, h, target_size) - - h_off = int(round((h - th) / 2.)) - w_off = int(round((w - tw) / 2.)) - - img_crop = np_imgs[:, h_off:h_off + target_size, w_off:w_off + - target_size, :] - return img_crop - - -def group_scale(imgs, target_size): - resized_imgs = [] - for i in range(len(imgs)): - img = imgs[i] - w, h = img.size - if (w <= h and w == target_size) or (h <= w and h == target_size): - resized_imgs.append(img) - continue - - if w < h: - ow = target_size - oh = int(target_size * 4.0 / 3.0) - resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) - else: - oh = target_size - ow = int(target_size * 4.0 / 3.0) - resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) - - return resized_imgs - - -def mp4_loader(filepath, nsample, seglen, mode): - cap = cv2.VideoCapture(filepath) - videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - sampledFrames = [] - for i in range(videolen): - ret, frame = cap.read() - # maybe first frame is empty - if ret == False: - continue - img = frame[:, :, ::-1] - sampledFrames.append(img) - average_dur = int(len(sampledFrames) / nsample) - imgs = [] - for i in range(nsample): - idx = 0 - if average_dur >= seglen: - idx = (average_dur - 1) // 2 - idx += i * average_dur - elif average_dur >= 1: - idx += i * average_dur - else: - idx = i - - for jj in range(idx, idx + seglen): - imgbuf = sampledFrames[int(jj % len(sampledFrames))] - img = Image.fromarray(imgbuf, mode='RGB') - imgs.append(img) - return imgs diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py deleted file mode 100644 index 54b2d7ad82e6cfe2142c0bc66942f775d7720c01..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - - -class ReaderNotFoundError(Exception): - "Error: reader not found" - - def __init__(self, reader_name, avail_readers): - super(ReaderNotFoundError, self).__init__() - self.reader_name = reader_name - self.avail_readers = avail_readers - - def __str__(self): - msg = "Reader {} Not Found.\nAvailiable readers:\n".format( - self.reader_name) - for reader in self.avail_readers: - msg += " {}\n".format(reader) - return msg - - -class DataReader(object): - """data reader for video input""" - - def __init__(self, model_name, mode, cfg): - self.name = model_name - self.mode = mode - self.cfg = cfg - - def create_reader(self): - """Not implemented""" - pass - - def get_config_from_sec(self, sec, item, default=None): - if sec.upper() not in self.cfg: - return default - return self.cfg[sec.upper()].get(item, default) - - -class ReaderZoo(object): - def __init__(self): - self.reader_zoo = {} - - def regist(self, name, reader): - assert reader.__base__ == DataReader, "Unknow model type {}".format( - type(reader)) - self.reader_zoo[name] = reader - - def get(self, name, mode, cfg): - for k, v in self.reader_zoo.items(): - if k == name: - return v(name, mode, cfg) - raise ReaderNotFoundError(name, self.reader_zoo.keys()) - - -# singleton reader_zoo -reader_zoo = ReaderZoo() - - -def regist_reader(name, reader): - reader_zoo.regist(name, reader) - - -def get_reader(name, mode, cfg): - reader_model = reader_zoo.get(name, mode, cfg) - return reader_model.create_reader() diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py deleted file mode 100644 index 05947265b5d8cea165fe9a97d024cbf4b1309181..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import logging - -from .utility import AttrDict - -logger = logging.getLogger(__name__) - -CONFIG_SECS = [ - 'train', - 'valid', - 'test', - 'infer', -] - - -def parse_config(cfg_file): - """Load a config file into AttrDict""" - import yaml - with open(cfg_file, 'r') as fopen: - yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader)) - create_attr_dict(yaml_config) - return yaml_config - - -def create_attr_dict(yaml_config): - from ast import literal_eval - for key, value in yaml_config.items(): - if type(value) is dict: - yaml_config[key] = value = AttrDict(value) - if isinstance(value, str): - try: - value = literal_eval(value) - except BaseException: - pass - if isinstance(value, AttrDict): - create_attr_dict(yaml_config[key]) - else: - yaml_config[key] = value - return - - -def merge_configs(cfg, sec, args_dict): - assert sec in CONFIG_SECS, "invalid config section {}".format(sec) - sec_dict = getattr(cfg, sec.upper()) - for k, v in args_dict.items(): - if v is None: - continue - try: - if hasattr(sec_dict, k): - setattr(sec_dict, k, v) - except: - pass - return cfg - - -def print_configs(cfg, mode): - logger.info( - "---------------- {:>5} Arguments ----------------".format(mode)) - for sec, sec_items in cfg.items(): - logger.info("{}:".format(sec)) - for k, v in sec_items.items(): - logger.info(" {}:{}".format(k, v)) - logger.info("-------------------------------------------------") diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py b/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py deleted file mode 100644 index 2909479b9132e2ffbd3d870f005a12a9c7f68151..0000000000000000000000000000000000000000 --- a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import logging - -import time -import numpy as np -import paddle.fluid as fluid -from paddle.fluid import profiler - -logger = logging.getLogger(__name__) - - -def log_lr_and_step(): - try: - # In optimizers, if learning_rate is set as constant, lr_var - # name is 'learning_rate_0', and iteration counter is not - # recorded. If learning_rate is set as decayed values from - # learning_rate_scheduler, lr_var name is 'learning_rate', - # and iteration counter is recorded with name '@LR_DECAY_COUNTER@', - # better impliment is required here - lr_var = fluid.global_scope().find_var("learning_rate") - if not lr_var: - lr_var = fluid.global_scope().find_var("learning_rate_0") - lr = np.array(lr_var.get_tensor()) - - lr_count = '[-]' - lr_count_var = fluid.global_scope().find_var("@LR_DECAY_COUNTER@") - if lr_count_var: - lr_count = np.array(lr_count_var.get_tensor()) - logger.info( - "------- learning rate {}, learning rate counter {} -----".format( - np.array(lr), np.array(lr_count))) - except: - logger.warn("Unable to get learning_rate and LR_DECAY_COUNTER.") - - -def test_with_dataloader(exe, - compiled_test_prog, - test_dataloader, - test_fetch_list, - test_metrics, - log_interval=0, - save_model_name=''): - if not test_dataloader: - logger.error("[TEST] get dataloader failed.") - test_metrics.reset() - test_iter = 0 - - for data in test_dataloader(): - test_outs = exe.run( - compiled_test_prog, fetch_list=test_fetch_list, feed=data) - test_metrics.accumulate(test_outs) - if log_interval > 0 and test_iter % log_interval == 0: - test_metrics.calculate_and_log_out(test_outs, \ - info = '[TEST] test_iter {} '.format(test_iter)) - test_iter += 1 - test_metrics.finalize_and_log_out("[TEST] Finish") - - -def train_with_dataloader(exe, train_prog, compiled_train_prog, train_dataloader, \ - train_fetch_list, train_metrics, epochs = 10, \ - log_interval = 0, valid_interval = 0, save_dir = './', \ - save_model_name = 'model', fix_random_seed = False, \ - compiled_test_prog = None, test_dataloader = None, \ - test_fetch_list = None, test_metrics = None, \ - is_profiler = None, profiler_path = None): - if not train_dataloader: - logger.error("[TRAIN] get dataloader failed.") - epoch_periods = [] - train_loss = 0 - for epoch in range(epochs): - log_lr_and_step() - - train_iter = 0 - epoch_periods = [] - - for data in train_dataloader(): - cur_time = time.time() - train_outs = exe.run( - compiled_train_prog, fetch_list=train_fetch_list, feed=data) - period = time.time() - cur_time - epoch_periods.append(period) - if log_interval > 0 and (train_iter % log_interval == 0): - train_metrics.calculate_and_log_out(train_outs, \ - info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter)) - train_iter += 1 - - # NOTE: profiler tools, used for benchmark - if is_profiler and epoch == 0 and train_iter == log_interval: - profiler.start_profiler("All") - elif is_profiler and epoch == 0 and train_iter == log_interval + 5: - profiler.stop_profiler("total", profiler_path) - return - - if len(epoch_periods) < 1: - logger.info( - 'No iteration was executed, please check the data reader') - sys.exit(1) - - logger.info( - '[TRAIN] Epoch {} training finished, average time: {}'.format( - epoch, np.mean(epoch_periods[1:]))) - save_model( - exe, - train_prog, - save_dir, - save_model_name, - "_epoch{}".format(epoch), - save_type='.pdckpt') - save_model( - exe, - train_prog, - save_dir, - save_model_name, - "_epoch{}".format(epoch), - save_type='.pdparams') - if compiled_test_prog and valid_interval > 0 and ( - epoch + 1) % valid_interval == 0: - test_with_dataloader(exe, compiled_test_prog, test_dataloader, - test_fetch_list, test_metrics, log_interval, - save_model_name) - - save_model( - exe, - train_prog, - save_dir, - save_model_name, - '_final', - save_type='.pdckpt') - save_model( - exe, - train_prog, - save_dir, - save_model_name, - '_final', - save_type='.pdparams') - #when fix_random seed for debug - if fix_random_seed: - cards = os.environ.get('CUDA_VISIBLE_DEVICES') - gpu_num = len(cards.split(",")) - print("kpis\ttrain_cost_card{}\t{}".format(gpu_num, train_loss)) - print("kpis\ttrain_speed_card{}\t{}".format(gpu_num, - np.mean(epoch_periods))) - - -def save_model(exe, - program, - save_dir, - model_name, - postfix=None, - save_type='.pdckpt'): - """ - save_type: '.pdckpt' or '.pdparams', '.pdckpt' for all persistable variables, - '.pdparams' for parameters only - """ - if not os.path.isdir(save_dir): - os.makedirs(save_dir) - saved_model_name = model_name + postfix - - fluid.save(program, os.path.join(save_dir, saved_model_name)) - - return diff --git a/hub_module/scripts/README.md b/hub_module/scripts/README.md deleted file mode 100644 index 7aba94a5a8fbc1bbdb0fa0861c2dfb30f39e90ff..0000000000000000000000000000000000000000 --- a/hub_module/scripts/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# 打包&安装方法 - -以打包lac模型为例子,需要以下几个步骤: - -## 一、配置文件 - -配置一个yml文件,格式如下: -> name: 模型名称 -> -> dir: 模型目录(相对于repo根目录) -> -> exclude: 打包时需要跳过的文件名(相对于dir目录),可以不填写该字段 -> -> resources: 需要额外下载的资源文件,可以不填写该字段 -> ->> url: 资源所在的url ->> ->> dest: 资源下载后解压到什么路径(相对于dir目录) ->> ->> uncompress: 文件是否需要解压。可以不填写该字段,默认为False - -## 二、执行打包代码 - -```shell -python pack.py --config configs/lac.yml -``` -执行完该操作后,如果打包成功,则会保存一个lac-2.1.0.tar.gz文件 - -## 三、安装模型 - -```shell -hub install lac-2.1.0.tar.gz -``` diff --git a/hub_module/scripts/check_code_style.sh b/hub_module/scripts/check_code_style.sh deleted file mode 100755 index 682b9cd3d19ae7d776a77c5a0b12b36ef8f8a692..0000000000000000000000000000000000000000 --- a/hub_module/scripts/check_code_style.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -function abort(){ - echo "Your change doesn't follow PaddleHub's code style." 1>&2 - echo "Please use pre-commit to check what is wrong." 1>&2 - exit 1 -} - -trap 'abort' 0 -set -e - -cd $TRAVIS_BUILD_DIR -export PATH=/usr/bin:$PATH -pre-commit install - -if ! pre-commit run -a ; then - git diff - exit 1 -fi - -trap : 0 diff --git a/hub_module/scripts/configs/ace2p.yml b/hub_module/scripts/configs/ace2p.yml deleted file mode 100644 index cfb21243d9e9b2715fb3958c934bfbcf53e7b038..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ace2p.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ace2p -dir: "modules/image/semantic_segmentation/ace2p" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/ace2p_human_parsing.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/bert_cased_L_12_H_768_A_12.yml b/hub_module/scripts/configs/bert_cased_L_12_H_768_A_12.yml deleted file mode 100644 index 1d02e3305d3f5a6919534b2fd5d0e833ea6a9a87..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_cased_L_12_H_768_A_12.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_cased_L_12_H_768_A_12 -dir: "modules/text/semantic_model/bert_cased_L_12_H_768_A_12" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_cased_L_12_H_768_A_12/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/bert_cased_L_24_H_1024_A_16.yml b/hub_module/scripts/configs/bert_cased_L_24_H_1024_A_16.yml deleted file mode 100644 index 84b86d20095a3866c094497a41733f88f62d40c4..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_cased_L_24_H_1024_A_16.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_cased_L_24_H_1024_A_16 -dir: "modules/text/semantic_model/bert_cased_L_24_H_1024_A_16" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_cased_L_24_H_1024_A_16/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/bert_chinese_L_12_H_768_A_12.yml b/hub_module/scripts/configs/bert_chinese_L_12_H_768_A_12.yml deleted file mode 100644 index ce012efe190bf1ac38880b66b8eb827af91c2db1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_chinese_L_12_H_768_A_12.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_chinese_L_12_H_768_A_12 -dir: "modules/text/semantic_model/bert_chinese_L_12_H_768_A_12" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_chinese_L_12_H_768_A_12/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/bert_multi_cased_L_12_H_768_A_12.yml b/hub_module/scripts/configs/bert_multi_cased_L_12_H_768_A_12.yml deleted file mode 100644 index fddf33fa422bcf22c56643464395f4c32eff6771..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_multi_cased_L_12_H_768_A_12.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_multi_cased_L_12_H_768_A_12 -dir: "modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_multi_cased_L_12_H_768_A_12/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/bert_multi_uncased_L_12_H_768_A_12.yml b/hub_module/scripts/configs/bert_multi_uncased_L_12_H_768_A_12.yml deleted file mode 100644 index 898001b22b3677708c75c43069b839202880751c..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_multi_uncased_L_12_H_768_A_12.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_multi_uncased_L_12_H_768_A_12 -dir: "modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_multi_uncased_L_12_H_768_A_12/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/bert_uncased_L_12_H_768_A_12.yml b/hub_module/scripts/configs/bert_uncased_L_12_H_768_A_12.yml deleted file mode 100644 index 4a4a7e62f6232259494df02466bd76f2cc167399..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_uncased_L_12_H_768_A_12.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_uncased_L_12_H_768_A_12 -dir: "modules/text/semantic_model/bert_uncased_L_12_H_768_A_12" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_uncased_L_12_H_768_A_12/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/bert_uncased_L_24_H_1024_A_16.yml b/hub_module/scripts/configs/bert_uncased_L_24_H_1024_A_16.yml deleted file mode 100644 index 7137bb3816f631530a93493fc86097762b84ef7f..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/bert_uncased_L_24_H_1024_A_16.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: bert_uncased_L_24_H_1024_A_16 -dir: "modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/bert_uncased_L_24_H_1024_A_16/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese-bert-wwm-ext.yml b/hub_module/scripts/configs/chinese-bert-wwm-ext.yml deleted file mode 100644 index 0c898fce93d19db31dc3e48921f8cdfd58ae0925..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese-bert-wwm-ext.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: chinese-bert-wwm-ext -dir: "modules/text/semantic_model/chinese_bert_wwm_ext" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/chinese_bert_wwm_ext/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese-bert-wwm.yml b/hub_module/scripts/configs/chinese-bert-wwm.yml deleted file mode 100644 index 92232d7e13377fdf4f4ff711349abe91763f8883..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese-bert-wwm.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: chinese-bert-wwm -dir: "modules/text/semantic_model/chinese_bert_wwm" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/chinese_bert_wwm/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese-roberta-wwm-ext-large.yml b/hub_module/scripts/configs/chinese-roberta-wwm-ext-large.yml deleted file mode 100644 index 0311ca079a385c57e6fcfa56a4b30cd517789028..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese-roberta-wwm-ext-large.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: chinese-roberta-wwm-ext-large -dir: "modules/text/semantic_model/chinese_roberta_wwm_ext_large" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/chinese_roberta_wwm_ext_large/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese-roberta-wwm-ext.yml b/hub_module/scripts/configs/chinese-roberta-wwm-ext.yml deleted file mode 100644 index c732ffb3f09919f002f872425c37e2744f94c517..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese-roberta-wwm-ext.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: chinese-roberta-wwm-ext -dir: "modules/text/semantic_model/chinese_roberta_wwm_ext" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/chinese_roberta_wwm_ext/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese_electra_base.yml b/hub_module/scripts/configs/chinese_electra_base.yml deleted file mode 100644 index 432e35a391444162e6e63373ffeb1063ed5f228c..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese_electra_base.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: chinese-electra-base -dir: "modules/text/semantic_model/chinese_electra_base" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/chinese_electra_base/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese_electra_small.yml b/hub_module/scripts/configs/chinese_electra_small.yml deleted file mode 100644 index 96aa1621785c9bf210889ee99e91cd9bb897618b..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese_electra_small.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: chinese-electra-small -dir: "modules/text/semantic_model/chinese_electra_small" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/chinese_electra_small/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/chinese_ocr_db_crnn_mobile.yml b/hub_module/scripts/configs/chinese_ocr_db_crnn_mobile.yml deleted file mode 100644 index 08616bbdfb4430786e00b979cacbe8e59ee1eebb..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese_ocr_db_crnn_mobile.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: chinese_ocr_db_crnn_mobile -dir: "modules/image/text_recognition/chinese_ocr_db_crnn_mobile" -exclude: - - README.md - -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/ocr/chinese_ocr_db_rcnn_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/chinese_ocr_db_crnn_server.yml b/hub_module/scripts/configs/chinese_ocr_db_crnn_server.yml deleted file mode 100644 index fc827322e4e1c5f4880adf277493bca228d5c21c..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese_ocr_db_crnn_server.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: chinese_ocr_db_crnn_mobile -dir: "modules/image/text_recognition/chinese_ocr_db_crnn_server" -exclude: - - README.md - -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/ocr/chinese_ocr_db_crnn_server_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/chinese_text_detection_db_mobile.yml b/hub_module/scripts/configs/chinese_text_detection_db_mobile.yml deleted file mode 100644 index 106f0a2eac97f59d36261a2fb7a93550a568b2f0..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese_text_detection_db_mobile.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: chinese_text_detection_db_mobile -dir: "modules/image/text_recognition/chinese_text_detection_db_mobile" -exclude: - - README.md - -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/ocr/chinese_text_detection_db_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/chinese_text_detection_db_server.yml b/hub_module/scripts/configs/chinese_text_detection_db_server.yml deleted file mode 100644 index 0fc9a4f45cba33f5aad14f000c09fc75d1728af0..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/chinese_text_detection_db_server.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: chinese_text_detection_db_mobile -dir: "modules/image/text_recognition/chinese_text_detection_db_server" -exclude: - - README.md - -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/ocr/ch_det_r50_vd_db.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/darknet53_imagenet.yml b/hub_module/scripts/configs/darknet53_imagenet.yml deleted file mode 100644 index 615ca5de328d14302d50d46c7ea1b9077d6a814a..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/darknet53_imagenet.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: darknet53_imagenet -dir: "modules/image/classification/darknet53_imagenet" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/darknet53_model.tar.gz - dest: darknet53_model - uncompress: True diff --git a/hub_module/scripts/configs/deep_voice3.yml b/hub_module/scripts/configs/deep_voice3.yml deleted file mode 100644 index 4acdb7133bf022031e2bafe2cfd10d8bb0a47a08..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/deep_voice3.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ace2p -dir: "modules/audio/tts/deep_voice3" -exclude: - - README.md -resources: - - - url: https://paddlespeech.bj.bcebos.com/Parakeet/deepvoice3_ljspeech_griffin-lim_ckpt_1.0.zip - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/deeplabv3p_xception65_humanseg.yml b/hub_module/scripts/configs/deeplabv3p_xception65_humanseg.yml deleted file mode 100644 index 0330a3ca1e21b9bf4f204b6fad5cad702171e7c1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/deeplabv3p_xception65_humanseg.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: human_pose_estimation_resnet50_mpii -dir: "modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/deeplabv3p_xception65_humanseg_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb0_imagenet.yml b/hub_module/scripts/configs/efficientnetb0_imagenet.yml deleted file mode 100644 index 87e18ea52b7c0a3c045b32b901e4910aeba4bdd7..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb0_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb0_imagenet -dir: "modules/image/classification/efficientnetb0_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb0_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb0_small_imagenet.yml b/hub_module/scripts/configs/efficientnetb0_small_imagenet.yml deleted file mode 100644 index 402cc05559730a73f6225acbf0ba567e190ecd87..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb0_small_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb0_small_imagenet -dir: "modules/image/classification/efficientnetb0_small_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb0_small_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb1_imagenet.yml b/hub_module/scripts/configs/efficientnetb1_imagenet.yml deleted file mode 100644 index 01744d8940ab8847374b6cf11eb85f93b3622cb5..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb1_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb1_imagenet -dir: "modules/image/classification/efficientnetb1_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb1_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb2_imagenet.yml b/hub_module/scripts/configs/efficientnetb2_imagenet.yml deleted file mode 100644 index a15b4b8481a4d546bd4c83c46ffef3a3549e8f90..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb2_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb2_imagenet -dir: "modules/image/classification/efficientnetb2_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb2_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb3_imagenet.yml b/hub_module/scripts/configs/efficientnetb3_imagenet.yml deleted file mode 100644 index 321dc4b2ac098b64d07d93c7b160f7a012a5d73c..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb3_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb3_imagenet -dir: "modules/image/classification/efficientnetb3_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb3_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb4_imagenet.yml b/hub_module/scripts/configs/efficientnetb4_imagenet.yml deleted file mode 100644 index 82f2590de1399341b59ddff097324e305e0e1430..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb4_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb4_imagenet -dir: "modules/image/classification/efficientnetb4_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb4_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb5_imagenet.yml b/hub_module/scripts/configs/efficientnetb5_imagenet.yml deleted file mode 100644 index 8aac6b4b250331dbd59badfac31c66a8fa01b12d..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb5_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb5_imagenet -dir: "modules/image/classification/efficientnetb5_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb5_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb6_imagenet.yml b/hub_module/scripts/configs/efficientnetb6_imagenet.yml deleted file mode 100644 index a4482769aa187d4cbe138e8cc52f5f968ce39774..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb6_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb6_imagenet -dir: "modules/image/classification/efficientnetb6_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb6_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/efficientnetb7_imagenet.yml b/hub_module/scripts/configs/efficientnetb7_imagenet.yml deleted file mode 100644 index 6931c9fd9fcd7d0a856a8d01eccf0c605b98e64a..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/efficientnetb7_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: efficientnetb7_imagenet -dir: "modules/image/classification/efficientnetb7_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/image/classification/efficientnetb7_imagenet_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/emotion_detection_textcnn.yml b/hub_module/scripts/configs/emotion_detection_textcnn.yml deleted file mode 100644 index 34c4640ea3abeff9fdc55d2849c3d1972fb5b598..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/emotion_detection_textcnn.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: emotion_detection_textcnn -dir: "modules/text/sentiment_analysis/emotion_detection_textcnn" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/sentiment_analysis/emotion_detection_textcnn_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/ernie.yml b/hub_module/scripts/configs/ernie.yml deleted file mode 100644 index 64eb4ed9e1eca122a67b9a21a0a0e2d7d0bd004e..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: ernie -dir: "modules/text/semantic_model/ernie" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_gen_acrostic_poetry.yml b/hub_module/scripts/configs/ernie_gen_acrostic_poetry.yml deleted file mode 100644 index e9e070582ec6367b0f3825a3297c318f95c0d704..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_gen_acrostic_poetry.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ernie_gen_acrostic_poetry -dir: "modules/text/text_generation/ernie_gen_acrostic_poetry" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_gen_acrostic_poetry/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_gen_couplet.yml b/hub_module/scripts/configs/ernie_gen_couplet.yml deleted file mode 100644 index 0ac5e56956d5f9e3c7f111660f87cb3e39bdd6ce..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_gen_couplet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ernie_gen_couplet -dir: "modules/text/text_generation/ernie_gen_couplet" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_gen_couplet/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_gen_lover_words.yml b/hub_module/scripts/configs/ernie_gen_lover_words.yml deleted file mode 100644 index 54b681996d8276d44ca6081a6eb2f4ad3f98598b..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_gen_lover_words.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ernie_gen_lover_words -dir: "modules/text/text_generation/ernie_gen_lover_words" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_gen_lover_words/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_gen_poetry.yml b/hub_module/scripts/configs/ernie_gen_poetry.yml deleted file mode 100644 index fbb735e1ce2a3676c397fe53e61f66e1f4e2c7e1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_gen_poetry.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ernie_gen_poetry -dir: "modules/text/text_generation/ernie_gen_poetry" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_gen_poetry/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_skep_sentiment_analysis.yml b/hub_module/scripts/configs/ernie_skep_sentiment_analysis.yml deleted file mode 100644 index a50edc199995183fe02cc84783e7c38b0306b3dc..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_skep_sentiment_analysis.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ernie_skep_sentiment_analysis -dir: "modules/text/sentiment_analysis/ernie_skep_sentiment_analysis" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_skep_sentiment_analysis/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_tiny.yml b/hub_module/scripts/configs/ernie_tiny.yml deleted file mode 100644 index fba4ad9fdba460aa2b7fc4fdb86fbe4b4924f22b..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_tiny.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: ernie_tiny -dir: "modules/text/semantic_model/ernie_tiny" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_tiny/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_tiny_couplet.yml b/hub_module/scripts/configs/ernie_tiny_couplet.yml deleted file mode 100644 index 268a9e0bd2821bf9a835d546326970a3573074cc..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_tiny_couplet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ernie_tiny_couplet -dir: "modules/text/text_generation/ernie_tiny_couplet" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_tiny_couplet/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_v2_eng_base.yml b/hub_module/scripts/configs/ernie_v2_eng_base.yml deleted file mode 100644 index 9245f51b24b497593be4e72df8b945534424af76..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_v2_eng_base.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: ernie_v2_eng_base -dir: "modules/text/semantic_model/ernie_v2_eng_base" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_v2_eng_base/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/ernie_v2_eng_large.yml b/hub_module/scripts/configs/ernie_v2_eng_large.yml deleted file mode 100644 index 364caf4ed5176df92a6734a986241208c773e5e2..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ernie_v2_eng_large.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: ernie_v2_eng_large -dir: "modules/text/semantic_model/ernie_v2_eng_large" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/ernie_v2_eng_large/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/face_landmark_localization.yml b/hub_module/scripts/configs/face_landmark_localization.yml deleted file mode 100644 index 060e7cb71d0cb18a39d3df092d61fbcc890e9163..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/face_landmark_localization.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: face_landmark_localization -dir: "modules/image/keypoint_detection/face_landmark_localization" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/face_landmark_localization.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/faster_rcnn_resnet50_coco2017.yml b/hub_module/scripts/configs/faster_rcnn_resnet50_coco2017.yml deleted file mode 100644 index 523fa67a39f263a1d439d0f7a7747d22db563792..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/faster_rcnn_resnet50_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: faster_rcnn_resnet50_coco2017 -dir: "modules/image/object_detection/faster_rcnn_resnet50_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/faster_rcnn_resnet50_model.tar.gz - dest: faster_rcnn_resnet50_model - uncompress: True diff --git a/hub_module/scripts/configs/faster_rcnn_resnet50_fpn_coco2017.yml b/hub_module/scripts/configs/faster_rcnn_resnet50_fpn_coco2017.yml deleted file mode 100644 index a438f19e43c4b023f774d5f170016dd3294fa2a5..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/faster_rcnn_resnet50_fpn_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: faster_rcnn_resnet50_fpn_coco2017 -dir: "modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/faster_rcnn_resnet50_fpn_model.tar.gz - dest: faster_rcnn_resnet50_fpn_model - uncompress: True diff --git a/hub_module/scripts/configs/faster_rcnn_resnet50_fpn_venus.yml b/hub_module/scripts/configs/faster_rcnn_resnet50_fpn_venus.yml deleted file mode 100644 index f285049d2ab6ab69d738a21837bc24b479ea0ac1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/faster_rcnn_resnet50_fpn_venus.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: faster_rcnn_resnet50_fpn_venus -dir: "modules/image/object_detection/faster_rcnn_resnet50_fpn_venus" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/faster_rcnn_resnet50_fpn_venus_model.tar.gz - dest: faster_rcnn_resnet50_fpn_model - uncompress: True diff --git a/hub_module/scripts/configs/fix_resnext101_32x48d_wsl_imagenet.yml b/hub_module/scripts/configs/fix_resnext101_32x48d_wsl_imagenet.yml deleted file mode 100644 index f852b0319fa428a7a46e4417daef1bcad6cb94bd..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/fix_resnext101_32x48d_wsl_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: fix_resnext101_32x48d_wsl_imagenet -dir: "modules/image/classification/fix_resnext101_32x48d_wsl_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/fix_resnext101_32x48d_wsl_imagenet_model.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/human_pose_estimation_resnet50_mpii.yml b/hub_module/scripts/configs/human_pose_estimation_resnet50_mpii.yml deleted file mode 100644 index 63c28cf0a6c20d6a910afe577e1058ccb9a8d8c0..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/human_pose_estimation_resnet50_mpii.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: human_pose_estimation_resnet50_mpii -dir: "modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/pose-resnet50-mpii-384x384.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/lac.yml b/hub_module/scripts/configs/lac.yml deleted file mode 100644 index 6211ea703db671f6a60c3e1e60c6d26dec7e491f..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/lac.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: lac -dir: "modules/text/lexical_analysis/lac" -exclude: - - user.dict - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/lac_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/lda_news.yml b/hub_module/scripts/configs/lda_news.yml deleted file mode 100644 index 7153d1713401ec053b67fe599659cce4766256f1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/lda_news.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: lda_news -dir: "modules/text/semantic_model/lda_news" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/lda_news.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/lda_novel.yml b/hub_module/scripts/configs/lda_novel.yml deleted file mode 100644 index 1c981182c22f923bcd5fa23e2de0faeaa7f26584..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/lda_novel.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: lda_novel -dir: "modules/text/semantic_model/lda_novel" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/lda_novel.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/lda_webpage.yml b/hub_module/scripts/configs/lda_webpage.yml deleted file mode 100644 index 7b3ceb7ba59a4a45273aa1a97288210e16763347..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/lda_webpage.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: lda_webpage -dir: "modules/text/semantic_model/lda_webpage" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/lda_webpage.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/mobilenet_v1_imagenet.yml b/hub_module/scripts/configs/mobilenet_v1_imagenet.yml deleted file mode 100644 index 93520bdf35773ad2f9f9dcebdb20878ce7d64ab9..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/mobilenet_v1_imagenet.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: mobilenet_v1_imagenet -dir: "modules/image/classification/mobilenet_v1_imagenet" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/mobilenet_v1_model.tar.gz - dest: mobilenet_v1_model - uncompress: True diff --git a/hub_module/scripts/configs/mobilenet_v2_animals.yml b/hub_module/scripts/configs/mobilenet_v2_animals.yml deleted file mode 100644 index 9a9860a75b7789fd2b7e83b99c67bdbe3df54376..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/mobilenet_v2_animals.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: mobilenet_v2_animals -dir: "modules/image/classification/mobilenet_v2_animals" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/cv/mobilenet_v2_animals.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/mobilenet_v2_dishes.yml b/hub_module/scripts/configs/mobilenet_v2_dishes.yml deleted file mode 100644 index 593408e62ef107539cf5d448f444dc6687da9b8a..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/mobilenet_v2_dishes.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: mobilenet_v2_dishes -dir: "modules/image/classification/mobilenet_v2_dishes" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/cv/mobilenet_v2_dishes.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/mobilenet_v2_imagenet_ssld.yml b/hub_module/scripts/configs/mobilenet_v2_imagenet_ssld.yml deleted file mode 100644 index 1476c2fd373ab81303628ca822db6ff76aa786bc..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/mobilenet_v2_imagenet_ssld.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: mobilenet_v2_imagenet_ssld -dir: "modules/image/classification/mobilenet_v2_imagenet_ssld" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/mobilenet_v2_imagenet_ssld_pretrained.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/mobilenet_v3_large_imagenet_ssld.yml b/hub_module/scripts/configs/mobilenet_v3_large_imagenet_ssld.yml deleted file mode 100644 index b656ce7a9a1357a592674e463aaa6192e3bbc976..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/mobilenet_v3_large_imagenet_ssld.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: mobilenet_v3_large_imagenet_ssld -dir: "modules/image/classification/mobilenet_v3_large_imagenet_ssld" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/mobilenet_v3_large_imagenet_ssld_pretrained.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/mobilenet_v3_small_imagenet_ssld.yml b/hub_module/scripts/configs/mobilenet_v3_small_imagenet_ssld.yml deleted file mode 100644 index 54058210abac333b16cba1568500750c3caacf98..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/mobilenet_v3_small_imagenet_ssld.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: mobilenet_v3_small_imagenet_ssld -dir: "modules/image/classification/mobilenet_v3_small_imagenet_ssld" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/mobilenet_v3_small_imagenet_ssld_pretrained.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/porn_cnn.yml b/hub_module/scripts/configs/porn_cnn.yml deleted file mode 100644 index 3d86ffe3d6a74c8dbd7f42a24759cade4a95abc8..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/porn_cnn.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: porn_detection_cnn -dir: "modules/text/text_review/porn_detection_cnn" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/porn_detection_cnn_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/porn_gru.yml b/hub_module/scripts/configs/porn_gru.yml deleted file mode 100644 index a7bfbef61bde921582e9f6a73e148f5c316faa3f..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/porn_gru.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: porn_detection_gru -dir: "modules/text/text_review/porn_detection_gru" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/porn_detection_gru_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/porn_lstm.yml b/hub_module/scripts/configs/porn_lstm.yml deleted file mode 100644 index cf1900070abdc727d4b1a81d3fffb01119ce2fe4..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/porn_lstm.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: porn_detection_lstm -dir: "modules/text/text_review/porn_detection_lstm" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/porn_detection_lstm_infer_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/pyramidbox_face_detection.yml b/hub_module/scripts/configs/pyramidbox_face_detection.yml deleted file mode 100644 index c58d69fa6915d4034e471cacacd9a52896aebae4..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/pyramidbox_face_detection.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: pyramidbox_face_detection -dir: "modules/image/face_detection/pyramidbox_face_detection" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/pyramidbox_face_detection_widerface.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/pyramidbox_lite_mobile.yml b/hub_module/scripts/configs/pyramidbox_lite_mobile.yml deleted file mode 100644 index dff403cbf9564e25ff3db7c0587eb87e21b6e6d9..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/pyramidbox_lite_mobile.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: pyramidbox_lite_mobile_mask -dir: "modules/image/face_detection/pyramidbox_lite_mobile" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/pyramidbox_lite_mobile_face_detection.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/pyramidbox_lite_mobile_mask.yml b/hub_module/scripts/configs/pyramidbox_lite_mobile_mask.yml deleted file mode 100644 index 420058299f357278a546481fd4af3ceef4d298fa..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/pyramidbox_lite_mobile_mask.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: pyramidbox_lite_mobile_mask -dir: "modules/image/face_detection/pyramidbox_lite_mobile_mask" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/pyramidbox_lite_mobile_mask_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/pyramidbox_lite_server.yml b/hub_module/scripts/configs/pyramidbox_lite_server.yml deleted file mode 100644 index 3b17992f13e947bad751867470a41017aa1c0435..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/pyramidbox_lite_server.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: pyramidbox_lite_mobile_mask -dir: "modules/image/face_detection/pyramidbox_lite_server" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/pyramidbox_lite_server_face_detection.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/pyramidbox_lite_server_mask.yml b/hub_module/scripts/configs/pyramidbox_lite_server_mask.yml deleted file mode 100644 index 22512406325c50896c365b9cf9d8532ed118bf87..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/pyramidbox_lite_server_mask.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: pyramidbox_lite_server_mask -dir: "modules/image/face_detection/pyramidbox_lite_server_mask" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/pyramidbox_lite_server_mask_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/rbt3.yml b/hub_module/scripts/configs/rbt3.yml deleted file mode 100644 index 979581e0da1e0cdafe61586e3905022593d36df2..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/rbt3.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: rbt3 -dir: "modules/text/semantic_model/rbt3" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/rbt3/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/rbtl3.yml b/hub_module/scripts/configs/rbtl3.yml deleted file mode 100644 index 0f2bfd22e73722137d35bc66ff3e7c3f3600fa17..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/rbtl3.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: rbtl3 -dir: "modules/text/semantic_model/rbtl3" -exclude: - - README.md - - test_context.py - - test_embedding.py -resources: - - - url: https://paddlehub.bj.bcebos.com/model/nlp/rbtl3/assets.tar.gz - dest: assets - uncompress: True diff --git a/hub_module/scripts/configs/res2net101_vd_26w_4s_imagenet.yml b/hub_module/scripts/configs/res2net101_vd_26w_4s_imagenet.yml deleted file mode 100644 index 523d80d99018b93213ed5d73ac7fa2623e1c62d8..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/res2net101_vd_26w_4s_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: res2net101_vd_26w_4s_imagenet -dir: "modules/image/classification/res2net101_vd_26w_4s_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/res2net101_vd_26w_4s_imagenet_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/resnet18_vd_imagenet.yml b/hub_module/scripts/configs/resnet18_vd_imagenet.yml deleted file mode 100644 index b3529f54722893fc4134e9950b426b45f2d6bb7c..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet18_vd_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: resnet18_vd_imagenet -dir: "modules/image/classification/resnet18_vd_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/resnet18_vd_imagenet_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/resnet34_v2_imagenet.yml b/hub_module/scripts/configs/resnet34_v2_imagenet.yml deleted file mode 100644 index 60c49c5caaaba2b65d1053a9d7dfcc2b514ea48d..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet34_v2_imagenet.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: resnet34_v2_imagenet -dir: "modules/image/classification/resnet34_v2_imagenet" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/resnet34_v2_model.tar.gz - dest: resnet34_v2_model - uncompress: True diff --git a/hub_module/scripts/configs/resnet50_v2_imagenet.yml b/hub_module/scripts/configs/resnet50_v2_imagenet.yml deleted file mode 100644 index 4fb5b3ffa706ddf7cacb4dfa075eedb09c1a20b5..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet50_v2_imagenet.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: resnet50_v2_imagenet -dir: "modules/image/classification/resnet50_v2_imagenet" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/resnet50_v2_model.tar.gz - dest: resnet50_v2_model - uncompress: True diff --git a/hub_module/scripts/configs/resnet50_vd_10w.yml b/hub_module/scripts/configs/resnet50_vd_10w.yml deleted file mode 100644 index a46563320e88c95e1c9c1d5a7c8ee6ff3eb9a9e1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet50_vd_10w.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: resnet50_vd_10w -dir: "modules/image/classification/resnet50_vd_10w" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/resnet50_vd_10w_pretrained.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/resnet50_vd_animals.yml b/hub_module/scripts/configs/resnet50_vd_animals.yml deleted file mode 100644 index 08bfaaee00eafbd13eb62329988edf0567e49a05..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet50_vd_animals.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: resnet50_vd_animals -dir: "modules/image/classification/resnet50_vd_animals" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/cv/resnet50_vd_animals.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/resnet50_vd_dishes.yml b/hub_module/scripts/configs/resnet50_vd_dishes.yml deleted file mode 100644 index d502e4ad4e80cd424d36cd778dc6fbfff2207362..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet50_vd_dishes.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: resnet50_vd_dishes -dir: "modules/image/classification/resnet50_vd_dishes" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/cv/resnet50_vd_dishes.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/resnet50_vd_imagenet_ssld.yml b/hub_module/scripts/configs/resnet50_vd_imagenet_ssld.yml deleted file mode 100644 index 74b2bd218a771a3284dce987adc993dafd890388..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet50_vd_imagenet_ssld.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: resnet50_vd_imagenet_ssld -dir: "modules/image/classification/resnet50_vd_imagenet_ssld" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/resnet50_vd_imagenet_ssld_pretrained.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/resnet50_vd_wildanimals.yml b/hub_module/scripts/configs/resnet50_vd_wildanimals.yml deleted file mode 100644 index a3b980836b2c303d12780cbfc45ed5dc3f985e76..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/resnet50_vd_wildanimals.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: resnet50_vd_wildanimals -dir: "modules/image/classification/resnet50_vd_wildanimals/" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/cv/resnet50_vd_wildanimals.tar.gz - dest: model - uncompress: True diff --git a/hub_module/scripts/configs/retinanet_resnet50_fpn_coco2017.yml b/hub_module/scripts/configs/retinanet_resnet50_fpn_coco2017.yml deleted file mode 100644 index 13e2a832118a889d0f87fae411f26e98d8e99f47..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/retinanet_resnet50_fpn_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: retinanet_resnet50_fpn_coco2017 -dir: "modules/image/object_detection/retinanet_resnet50_fpn_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/retinanet_resnet50_fpn_model.tar.gz - dest: retinanet_resnet50_fpn_model - uncompress: True diff --git a/hub_module/scripts/configs/se_resnet18_vd_imagenet.yml b/hub_module/scripts/configs/se_resnet18_vd_imagenet.yml deleted file mode 100644 index 037b45e65971466c89e0d32675277ed9bf993ec4..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/se_resnet18_vd_imagenet.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: se_resnet18_vd_imagenet -dir: "modules/image/classification/se_resnet18_vd_imagenet" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/SE_ResNet18_vd_imagenet_model.tar.gz - dest: se_resnet18_vd_imagenet_model - uncompress: True diff --git a/hub_module/scripts/configs/senta_bilstm.yml b/hub_module/scripts/configs/senta_bilstm.yml deleted file mode 100644 index fb3bccc475137ab4fd87be71d1afc67e051bb96a..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/senta_bilstm.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: senta_bilstm -dir: "modules/text/sentiment_analysis/senta_bilstm" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/sentiment_analysis/senta_bilstm_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/senta_bow.yml b/hub_module/scripts/configs/senta_bow.yml deleted file mode 100644 index dbad649bd46d0cd396e6d67decf83e665bf0c2f6..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/senta_bow.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: senta_bow -dir: "modules/text/sentiment_analysis/senta_bow" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/sentiment_analysis/senta_bow_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/senta_cnn.yml b/hub_module/scripts/configs/senta_cnn.yml deleted file mode 100644 index a5d4d5b3c4d7dfa24f9d2ea666416da8e9d65ef0..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/senta_cnn.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: senta_cnn -dir: "modules/text/sentiment_analysis/senta_cnn" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/sentiment_analysis/senta_cnn_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/senta_gru.yml b/hub_module/scripts/configs/senta_gru.yml deleted file mode 100644 index 1fe839f39c788da436ed4c7bb476aed492fbca73..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/senta_gru.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: senta_gru -dir: "modules/text/sentiment_analysis/senta_gru" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/sentiment_analysis/senta_gru_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/senta_lstm.yml b/hub_module/scripts/configs/senta_lstm.yml deleted file mode 100644 index 2df56c1d230e7b30c7f6eb3d40d423437f6c08f0..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/senta_lstm.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: senta_lstm -dir: "modules/text/sentiment_analysis/senta_lstm" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/sentiment_analysis/senta_lstm_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/simnet_bow.yml b/hub_module/scripts/configs/simnet_bow.yml deleted file mode 100644 index be7ac501d41ef03427602d7ffccd6a59fbfc4dae..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/simnet_bow.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: simnet_bow -dir: "modules/text/semantic_model/simnet_bow" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/simnet_bow_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/slda_news.yml b/hub_module/scripts/configs/slda_news.yml deleted file mode 100644 index 1a2dc26c87639fca6c86434e32e48630c9b879bb..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/slda_news.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: slda_news -dir: "modules/text/semantic_model/slda_news" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/slda_news.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/slda_novel.yml b/hub_module/scripts/configs/slda_novel.yml deleted file mode 100644 index 4a018619d6549fac9b45457640100d4b2224764b..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/slda_novel.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: slda_novel -dir: "modules/text/semantic_model/slda_novel" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/slda_novel.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/slda_webpage.yml b/hub_module/scripts/configs/slda_webpage.yml deleted file mode 100644 index fe6d6122a3900515b03b2433178a2f2e01600cda..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/slda_webpage.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: slda_webpage -dir: "modules/text/semantic_model/slda_webpage" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/slda_webpage.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/slda_weibo.yml b/hub_module/scripts/configs/slda_weibo.yml deleted file mode 100644 index e6f68fc507299af9332a6517fd8cf2e5fd315085..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/slda_weibo.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: slda_weibo -dir: "modules/text/semantic_model/slda_weibo" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/semantic_model/slda_weibo.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/ssd_mobilenet_v1_pascal.yml b/hub_module/scripts/configs/ssd_mobilenet_v1_pascal.yml deleted file mode 100644 index 0571d74b768d5743441e57f74935ec05ba72b8cc..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ssd_mobilenet_v1_pascal.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: ssd_mobilenet_v1_pascal -dir: "modules/image/object_detection/ssd_mobilenet_v1_pascal" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/ssd_mobilenet_v1_model.tar.gz - dest: ssd_mobilenet_v1_model - uncompress: True diff --git a/hub_module/scripts/configs/ssd_vgg16_300_coco2017.yml b/hub_module/scripts/configs/ssd_vgg16_300_coco2017.yml deleted file mode 100644 index ba39deb1ccb140e0de9c8b9f1959eba3ba8f90c8..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ssd_vgg16_300_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: ssd_vgg16_300_coco2017 -dir: "modules/image/object_detection/ssd_vgg16_300_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/ssd_vgg16_300_model.tar.gz - dest: ssd_vgg16_300_model - uncompress: True diff --git a/hub_module/scripts/configs/ssd_vgg16_512_coco2017.yml b/hub_module/scripts/configs/ssd_vgg16_512_coco2017.yml deleted file mode 100644 index b3c375f8a20d68787c40dd682e48c39b049743e1..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ssd_vgg16_512_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: ssd_vgg16_512_coco2017 -dir: "modules/image/object_detection/ssd_vgg16_512_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/ssd_vgg16_512_model.tar.gz - dest: ssd_vgg16_512_model - uncompress: True diff --git a/hub_module/scripts/configs/stylepro_artistic.yml b/hub_module/scripts/configs/stylepro_artistic.yml deleted file mode 100644 index 205aab813189f0a1babd4329dfcd0c0b83b5fc4d..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/stylepro_artistic.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: stylepro_artistic -dir: "modules/image/style_transfer/stylepro_artistic/" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/style_transfer/style_projection_enc.tar - dest: . - uncompress: True - - - url: https://bj.bcebos.com/paddlehub/style_transfer/style_projection_dec.tar - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/tencent_ailab_chinese_embedding.yml b/hub_module/scripts/configs/tencent_ailab_chinese_embedding.yml deleted file mode 100644 index 11497128f75c0eedd9cdceed237f8ae445ce2607..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/tencent_ailab_chinese_embedding.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: tencent_ailab_chinese_embedding -dir: "modules/text/embedding/tencent_ailab_chinese_embedding" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/embedding/tencent_ailab_chinese_embedding_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/tencent_ailab_chinese_embedding_small.yml b/hub_module/scripts/configs/tencent_ailab_chinese_embedding_small.yml deleted file mode 100644 index 82e3debfa92981480ad1de229e30b2ccf908dd2f..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/tencent_ailab_chinese_embedding_small.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: tencent_ailab_chinese_embedding_small -dir: "modules/text/embedding/tencent_ailab_chinese_embedding_small" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/embedding/tencent_ailab_chinese_embedding_small_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/ultra_light_fast_generic_face_detector_1mb_320.yml b/hub_module/scripts/configs/ultra_light_fast_generic_face_detector_1mb_320.yml deleted file mode 100644 index 0d7dc0248711967df2eeeb02f18668a76f129436..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ultra_light_fast_generic_face_detector_1mb_320.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ultra_light_fast_generic_face_detector_1mb_320 -dir: "modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/ultra_light_fast_generic_face_detector_1mb_320.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/ultra_light_fast_generic_face_detector_1mb_640.yml b/hub_module/scripts/configs/ultra_light_fast_generic_face_detector_1mb_640.yml deleted file mode 100644 index 77b3f5a2eba0cb3357de95fef7d8c4a3e77d1e31..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/ultra_light_fast_generic_face_detector_1mb_640.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: ultra_light_fast_generic_face_detector_1mb_640 -dir: "modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/ultra_light_fast_generic_face_detector_1mb_640.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/vgg16_imagenet.yml b/hub_module/scripts/configs/vgg16_imagenet.yml deleted file mode 100644 index 3b607d73b5a4f7449a252bf6d17577ea6c11ba98..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/vgg16_imagenet.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: vgg16_imagenet -dir: "modules/image/classification/vgg16_imagenet" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/vgg16_model.tar.gz - dest: vgg16_model - uncompress: True diff --git a/hub_module/scripts/configs/videotag_tsn_lstm.yml b/hub_module/scripts/configs/videotag_tsn_lstm.yml deleted file mode 100644 index b0dca8a06cf4844723caff32e3f0e2393e26bf4d..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/videotag_tsn_lstm.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: videotag_tsn_lstm -dir: "modules/video/classification/videotag_tsn_lstm" -exclude: - - README.md -resources: - - - url: https://paddlehub.bj.bcebos.com/model/video/video_classifcation/videotag_tsn_lstm.tar.gz - dest: weights - uncompress: True diff --git a/hub_module/scripts/configs/word2vec_skipgram.yml b/hub_module/scripts/configs/word2vec_skipgram.yml deleted file mode 100644 index b293d87614d0f32f40ec83cbbc79e0078af9ebcb..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/word2vec_skipgram.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: word2vec_skipgram -dir: "modules/text/embedding/word2vec_skipgram" -exclude: - - README.md - - test.py -resources: - - - url: https://bj.bcebos.com/paddlehub/model/nlp/embedding/word2vec_skipgram_assets.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_darknet53_coco2017.yml b/hub_module/scripts/configs/yolov3_darknet53_coco2017.yml deleted file mode 100644 index 9cc02efc3d64bd6de1aebbfd43f3933c150a6a12..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_darknet53_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: yolov3_darknet53_coco2017 -dir: "modules/image/object_detection/yolov3_darknet53_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/yolov3_darknet53_model.tar.gz - dest: yolov3_darknet53_model - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_darknet53_pedestrian.yml b/hub_module/scripts/configs/yolov3_darknet53_pedestrian.yml deleted file mode 100644 index a58308e565067a9f464ca424fdc58886f713f77f..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_darknet53_pedestrian.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: yolov3_darknet53_pedestrian -dir: "modules/image/object_detection/yolov3_darknet53_pedestrian" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/yolov3_darknet53_pedestrian_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_darknet53_vehicles.yml b/hub_module/scripts/configs/yolov3_darknet53_vehicles.yml deleted file mode 100644 index 3987eb6dd5442781d3c6a4d5f49e8a3414625d76..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_darknet53_vehicles.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: yolov3_darknet53_vehicles -dir: "modules/image/object_detection/yolov3_darknet53_vehicles" -exclude: - - README.md -resources: - - - url: https://bj.bcebos.com/paddlehub/hub_dev/yolov3_darknet53_vehicles_model.tar.gz - dest: . - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_darknet53_venus.yml b/hub_module/scripts/configs/yolov3_darknet53_venus.yml deleted file mode 100644 index fed10946666bc85f6c37a1860df683e290fc02c5..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_darknet53_venus.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: yolov3_darknet53_venus -dir: "modules/image/object_detection/yolov3_darknet53_venus" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/yolov3_darknet53_venus_model.tar.gz - dest: yolov3_darknet53_model - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_mobilenet_v1_coco2017.yml b/hub_module/scripts/configs/yolov3_mobilenet_v1_coco2017.yml deleted file mode 100644 index 65286f0001a7ff1521ae549ce01aa56646600397..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_mobilenet_v1_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: yolov3_mobilenet_v1_coco2017 -dir: "modules/image/object_detection/yolov3_mobilenet_v1_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/yolov3_mobilenet_v1_model.tar.gz - dest: yolov3_mobilenet_v1_model - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_resnet34_coco2017.yml b/hub_module/scripts/configs/yolov3_resnet34_coco2017.yml deleted file mode 100644 index 5c91626e643d5e30192fd61cc5e7a9d66b7190b9..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_resnet34_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: yolov3_resnet34_coco2017 -dir: "modules/image/object_detection/yolov3_resnet34_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/yolov3_resnet34_model.tar.gz - dest: yolov3_resnet34_model - uncompress: True diff --git a/hub_module/scripts/configs/yolov3_resnet50_vd_coco2017.yml b/hub_module/scripts/configs/yolov3_resnet50_vd_coco2017.yml deleted file mode 100644 index 740cb5006e1a9a6739df2cb3112f73c7fd9a54bd..0000000000000000000000000000000000000000 --- a/hub_module/scripts/configs/yolov3_resnet50_vd_coco2017.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: yolov3_resnet50_vd_coco2017 -dir: "modules/image/object_detection/yolov3_resnet50_vd_coco2017" -resources: - - - url: https://paddlehub.bj.bcebos.com/model/cv/yolov3_resnet50_model.tar.gz - dest: yolov3_resnet50_model - uncompress: True diff --git a/hub_module/scripts/downloader.py b/hub_module/scripts/downloader.py deleted file mode 100644 index a1d93358cfcce980d8a3e5e92be605624666a829..0000000000000000000000000000000000000000 --- a/hub_module/scripts/downloader.py +++ /dev/null @@ -1,134 +0,0 @@ -from __future__ import print_function -from __future__ import division -from __future__ import print_function - -import shutil -import os -import sys -import time -import hashlib -import requests -import tarfile - -FLUSH_INTERVAL = 0.1 -lasttime = time.time() - - -def progress(str, end=False): - global lasttime - if end: - str += "\n" - lasttime = 0 - if time.time() - lasttime >= FLUSH_INTERVAL: - sys.stdout.write("\r%s" % str) - lasttime = time.time() - sys.stdout.flush() - - -class Downloader(object): - def download_file(self, - url, - save_path, - save_name=None, - retry_limit=3, - print_progress=False, - replace=False): - if not os.path.exists(save_path): - os.makedirs(save_path) - save_name = url.split('/')[-1] if save_name is None else save_name - file_name = os.path.join(save_path, save_name) - retry_times = 0 - - if replace and os.path.exists(file_name): - os.remove(file_name) - - while not (os.path.exists(file_name)): - if retry_times < retry_limit: - retry_times += 1 - else: - tips = "Cannot download {0} within retry limit {1}".format( - url, retry_limit) - return False, tips, None - r = requests.get(url, stream=True) - total_length = r.headers.get('content-length') - - if total_length is None: - with open(file_name, 'wb') as f: - shutil.copyfileobj(r.raw, f) - else: - with open(file_name, 'wb') as f: - dl = 0 - total_length = int(total_length) - starttime = time.time() - if print_progress: - print("Downloading %s" % save_name) - for data in r.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if print_progress: - done = int(50 * dl / total_length) - progress( - "[%-50s] %.2f%%" % - ('=' * done, float(dl / total_length * 100))) - if print_progress: - progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) - - tips = "File %s download completed!" % (file_name) - return True, tips, file_name - - def uncompress(self, - file, - dirname=None, - delete_file=False, - print_progress=False): - dirname = os.path.dirname(file) if dirname is None else dirname - if print_progress: - print("Uncompress %s" % file) - flag = "r:gz" if file.endswith("tar.gz") else "r:" - with tarfile.open(file, flag) as tar: - file_names = tar.getnames() - size = len(file_names) - 1 - module_dir = os.path.join(dirname, file_names[0]) - for index, file_name in enumerate(file_names): - if print_progress: - done = int(50 * float(index) / size) - progress("[%-50s] %.2f%%" % ('=' * done, - float(index / size * 100))) - tar.extract(file_name, dirname) - - if print_progress: - progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) - if delete_file: - os.remove(file) - - return True, "File %s uncompress completed!" % file, module_dir - - def download_file_and_uncompress(self, - url, - save_path, - save_name=None, - retry_limit=3, - delete_file=True, - print_progress=False, - replace=False): - result, tips_1, file = self.download_file( - url=url, - save_path=save_path, - save_name=save_name, - retry_limit=retry_limit, - print_progress=print_progress, - replace=replace) - if not result: - return result, tips_1, file - result, tips_2, file = self.uncompress( - file, delete_file=delete_file, print_progress=print_progress) - if not result: - return result, tips_2, file - if save_name: - save_name = os.path.join(save_path, save_name) - shutil.move(file, save_name) - return result, "%s\n%s" % (tips_1, tips_2), save_name - return result, "%s\n%s" % (tips_1, tips_2), file - - -downloader = Downloader() diff --git a/hub_module/scripts/pack.py b/hub_module/scripts/pack.py deleted file mode 100644 index 5ec734b8332f8f032f8bfdfc9f3c1a52a0541947..0000000000000000000000000000000000000000 --- a/hub_module/scripts/pack.py +++ /dev/null @@ -1,85 +0,0 @@ -# coding:utf8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import os -import tempfile -import tarfile -import shutil -import yaml -import re - -import paddlehub as hub - -from downloader import downloader - -PACK_PATH = os.path.dirname(os.path.realpath(__file__)) -MODULE_BASE_PATH = os.path.join(PACK_PATH, "..") - - -def parse_args(): - parser = argparse.ArgumentParser(description='packing PaddleHub Module') - parser.add_argument( - '--config', - dest='config', - help='Config file for module config', - default=None, - type=str) - return parser.parse_args() - - -def package_module(config): - with tempfile.TemporaryDirectory(dir=".") as _dir: - directory = os.path.join(MODULE_BASE_PATH, config["dir"]) - name = config['name'].replace('-', '_') - dest = os.path.join(_dir, name) - shutil.copytree(directory, dest) - for resource in config.get("resources", {}): - if resource.get("uncompress", False): - _, _, file = downloader.download_file_and_uncompress( - url=resource["url"], save_path=dest, print_progress=True) - else: - _, _, file = downloader.download_file( - url=resource["url"], save_path=dest, print_progress=True) - - dest_path = os.path.join(dest, resource["dest"]) - if resource["dest"] != ".": - if os.path.realpath(dest_path) != os.path.realpath(file): - shutil.move(file, dest_path) - - tar_filter = lambda tarinfo: None if any([ - exclude_file_name in tarinfo.name.replace(name + os.sep, "") - for exclude_file_name in config.get("exclude", []) - ]) else tarinfo - - with open(os.path.join(directory, "module.py")) as file: - file_content = file.read() - file_content = file_content.replace('\n', - '').replace(' ', '').replace( - '"', '').replace("'", '') - module_info = re.findall('@moduleinfo\(.*?\)', - file_content)[0].replace( - '@moduleinfo(', '').replace(')', '') - module_info = module_info.split(',') - for item in module_info: - if item.startswith('version'): - module_version = item.split('=')[1].replace(',', '') - if item.startswith('name'): - module_name = item.split('=')[1].replace(',', '') - package = "{}_{}.tar.gz".format(module_name, module_version) - with tarfile.open(package, "w:gz") as tar: - tar.add( - dest, arcname=os.path.basename(module_name), filter=tar_filter) - - -def main(args): - with open(args.config, "r") as file: - config = yaml.load(file.read(), Loader=yaml.FullLoader) - - package_module(config) - - -if __name__ == "__main__": - main(parse_args()) diff --git a/hub_module/tests/image_dataset/classification/animals/dog.jpeg b/hub_module/tests/image_dataset/classification/animals/dog.jpeg deleted file mode 100644 index 768f27ef43ab9f27db3f339ced951249e8e7778e..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/animals/dog.jpeg and /dev/null differ diff --git a/hub_module/tests/image_dataset/classification/animals/elephant.jpg b/hub_module/tests/image_dataset/classification/animals/elephant.jpg deleted file mode 100644 index 7ea3e7fbe9d73e6ba187865234d2bc040364dbb6..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/animals/elephant.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/classification/animals/pandas.jpg b/hub_module/tests/image_dataset/classification/animals/pandas.jpg deleted file mode 100644 index f0bd7bf7603073790cf29491eb5de5cfddd31962..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/animals/pandas.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/classification/animals/pangolin.jpeg b/hub_module/tests/image_dataset/classification/animals/pangolin.jpeg deleted file mode 100644 index 05764166fc2977edb0fad0295226d781a8c8282e..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/animals/pangolin.jpeg and /dev/null differ diff --git a/hub_module/tests/image_dataset/classification/animals/tigger.jpg b/hub_module/tests/image_dataset/classification/animals/tigger.jpg deleted file mode 100644 index 472a7c7b4567383dad25420dc00258237d54d3fb..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/animals/tigger.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/classification/dish/tofu.jpeg b/hub_module/tests/image_dataset/classification/dish/tofu.jpeg deleted file mode 100644 index 10357346d31cbd90d93883d809ed137b64454fa9..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/dish/tofu.jpeg and /dev/null differ diff --git a/hub_module/tests/image_dataset/classification/dish/tomato.jpeg b/hub_module/tests/image_dataset/classification/dish/tomato.jpeg deleted file mode 100644 index b895eba2ab86588b7fd99ed0b034cea2a80a7dd8..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/classification/dish/tomato.jpeg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/cat.jpg b/hub_module/tests/image_dataset/detection/cat.jpg deleted file mode 100644 index 3781b079117e51ac0a763f407e95e1a674d266c0..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/cat.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/giraffe.jpg b/hub_module/tests/image_dataset/detection/giraffe.jpg deleted file mode 100644 index 4881c799ddb08e3f052b70b93060859ea587a1a5..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/giraffe.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/pascal_voc/airplane.jpg b/hub_module/tests/image_dataset/detection/pascal_voc/airplane.jpg deleted file mode 100755 index 3e9a3b3db1141ab4e6c9f09a514dedeec6e87bb9..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/pascal_voc/airplane.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/pascal_voc/bike.jpg b/hub_module/tests/image_dataset/detection/pascal_voc/bike.jpg deleted file mode 100755 index 5b7d9466b1e73fb39884296b950e6d834bb3453d..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/pascal_voc/bike.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/pascal_voc/bird.jpg b/hub_module/tests/image_dataset/detection/pascal_voc/bird.jpg deleted file mode 100755 index 905c7c6cd195e1af6ce735e4d110bf8e4f703fa5..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/pascal_voc/bird.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/pascal_voc/cowboy.jpg b/hub_module/tests/image_dataset/detection/pascal_voc/cowboy.jpg deleted file mode 100755 index fd62b3606be423bbdc24aa8017e90cfc1fc18db5..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/pascal_voc/cowboy.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/pascal_voc/sheep.jpg b/hub_module/tests/image_dataset/detection/pascal_voc/sheep.jpg deleted file mode 100755 index ca7b08ea5b6fecd730699f2be7e1e01e6f307c4f..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/pascal_voc/sheep.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/pascal_voc/train.jpg b/hub_module/tests/image_dataset/detection/pascal_voc/train.jpg deleted file mode 100755 index be812cbf8905ad74ef30f0ec6fba16cee34e1c32..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/pascal_voc/train.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/detection/zebra.jpg b/hub_module/tests/image_dataset/detection/zebra.jpg deleted file mode 100644 index 7d9e2d92c1e0f77fd190b5d2746f5875d7d3225a..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/detection/zebra.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/face_detection/aviation_staff.jpg b/hub_module/tests/image_dataset/face_detection/aviation_staff.jpg deleted file mode 100644 index 241364e7c015cb86d62d12342677dc6746c3abf9..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/face_detection/aviation_staff.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/face_detection/flight_attendants.jpg b/hub_module/tests/image_dataset/face_detection/flight_attendants.jpg deleted file mode 100644 index ec7f58130263a65cf178ee542345824838654092..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/face_detection/flight_attendants.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/face_detection/queue_mask.jpg b/hub_module/tests/image_dataset/face_detection/queue_mask.jpg deleted file mode 100644 index 6a93f8bd7dd82a30f781378d8a20712206d8b39c..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/face_detection/queue_mask.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/face_detection/rescue_team.jpg b/hub_module/tests/image_dataset/face_detection/rescue_team.jpg deleted file mode 100644 index c4ce3e93baf88adbdf4c4a745467862442068565..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/face_detection/rescue_team.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/keypoint_detection/girl2.jpg b/hub_module/tests/image_dataset/keypoint_detection/girl2.jpg deleted file mode 100644 index b17a15e238d1e27628d79099479d14b234eb95ae..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/keypoint_detection/girl2.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/keypoint_detection/girl3.jpg b/hub_module/tests/image_dataset/keypoint_detection/girl3.jpg deleted file mode 100644 index 05bb620eda6ab312c3692007ece37047660fb47e..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/keypoint_detection/girl3.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/keypoint_detection/girl4.jpg b/hub_module/tests/image_dataset/keypoint_detection/girl4.jpg deleted file mode 100644 index c05d4af57f08426a65bfbe5b8074233c1445e504..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/keypoint_detection/girl4.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/keypoint_detection/girl6.jpg b/hub_module/tests/image_dataset/keypoint_detection/girl6.jpg deleted file mode 100644 index a8f87f1ba7459cf94eed3798da230cbe085f28b9..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/keypoint_detection/girl6.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/keypoint_detection/ten.jpg b/hub_module/tests/image_dataset/keypoint_detection/ten.jpg deleted file mode 100644 index ec7f58130263a65cf178ee542345824838654092..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/keypoint_detection/ten.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/cat.jpg b/hub_module/tests/image_dataset/object_detection/cat.jpg deleted file mode 100644 index 3781b079117e51ac0a763f407e95e1a674d266c0..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/cat.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/dog.jpg b/hub_module/tests/image_dataset/object_detection/dog.jpg deleted file mode 100644 index e8621425374f6383d63998081d90d4209927442c..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/dog.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/giraffe.jpg b/hub_module/tests/image_dataset/object_detection/giraffe.jpg deleted file mode 100644 index 4881c799ddb08e3f052b70b93060859ea587a1a5..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/giraffe.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/pascal_voc/airplane.jpg b/hub_module/tests/image_dataset/object_detection/pascal_voc/airplane.jpg deleted file mode 100755 index 3e9a3b3db1141ab4e6c9f09a514dedeec6e87bb9..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/pascal_voc/airplane.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/pascal_voc/bike.jpg b/hub_module/tests/image_dataset/object_detection/pascal_voc/bike.jpg deleted file mode 100755 index 5b7d9466b1e73fb39884296b950e6d834bb3453d..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/pascal_voc/bike.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/pascal_voc/bird.jpg b/hub_module/tests/image_dataset/object_detection/pascal_voc/bird.jpg deleted file mode 100755 index 905c7c6cd195e1af6ce735e4d110bf8e4f703fa5..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/pascal_voc/bird.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/pascal_voc/cowboy.jpg b/hub_module/tests/image_dataset/object_detection/pascal_voc/cowboy.jpg deleted file mode 100755 index fd62b3606be423bbdc24aa8017e90cfc1fc18db5..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/pascal_voc/cowboy.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/pascal_voc/sheep.jpg b/hub_module/tests/image_dataset/object_detection/pascal_voc/sheep.jpg deleted file mode 100755 index ca7b08ea5b6fecd730699f2be7e1e01e6f307c4f..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/pascal_voc/sheep.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/pascal_voc/train.jpg b/hub_module/tests/image_dataset/object_detection/pascal_voc/train.jpg deleted file mode 100755 index be812cbf8905ad74ef30f0ec6fba16cee34e1c32..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/pascal_voc/train.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/vehicles/001.jpeg b/hub_module/tests/image_dataset/object_detection/vehicles/001.jpeg deleted file mode 100644 index 8786db5eb6773931c363358bb39462b33db55369..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/vehicles/001.jpeg and /dev/null differ diff --git a/hub_module/tests/image_dataset/object_detection/zebra.jpg b/hub_module/tests/image_dataset/object_detection/zebra.jpg deleted file mode 100644 index 7d9e2d92c1e0f77fd190b5d2746f5875d7d3225a..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/object_detection/zebra.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/semantic_segmentation/100098_193288.jpg b/hub_module/tests/image_dataset/semantic_segmentation/100098_193288.jpg deleted file mode 100644 index 0879e14baf4c3520d1a78e998fc81c603cc96c87..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/semantic_segmentation/100098_193288.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/semantic_segmentation/100519_473145.jpg b/hub_module/tests/image_dataset/semantic_segmentation/100519_473145.jpg deleted file mode 100644 index 9a1e214eea235e477e53211b01d5189453adb081..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/semantic_segmentation/100519_473145.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/chicago.jpg b/hub_module/tests/image_dataset/style_tranfer/content/chicago.jpg deleted file mode 100644 index 0fcc2f77353adde024a8437874567825b4947d83..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/chicago.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/cornell.jpg b/hub_module/tests/image_dataset/style_tranfer/content/cornell.jpg deleted file mode 100644 index 2dd270fa029f61b61464dc02c580adbba6c6257f..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/cornell.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/golden_gate.jpg b/hub_module/tests/image_dataset/style_tranfer/content/golden_gate.jpg deleted file mode 100644 index 4b5f20353b5c6b8ed8f5d02efe924e1cde9aa627..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/golden_gate.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/lenna.jpg b/hub_module/tests/image_dataset/style_tranfer/content/lenna.jpg deleted file mode 100644 index 0dffa5f852c1528311530e398961ca2a8c7b820c..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/lenna.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/modern.jpg b/hub_module/tests/image_dataset/style_tranfer/content/modern.jpg deleted file mode 100644 index 088e329cb5df603421b3cd46a12c6af08341b3c8..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/modern.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/nanshan.jpg b/hub_module/tests/image_dataset/style_tranfer/content/nanshan.jpg deleted file mode 100644 index b175bf01b1deb433aab706ad17ec477c9a158880..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/nanshan.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/content/sailboat.jpg b/hub_module/tests/image_dataset/style_tranfer/content/sailboat.jpg deleted file mode 100644 index 2ada328a4ba2806ab01c6f68048c5af568661c4c..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/content/sailboat.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/antimonocromatismo.jpg b/hub_module/tests/image_dataset/style_tranfer/style/antimonocromatismo.jpg deleted file mode 100644 index f55a52684befff6f97fc80392cae1a52ab7205e2..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/antimonocromatismo.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/asheville.jpg b/hub_module/tests/image_dataset/style_tranfer/style/asheville.jpg deleted file mode 100644 index 94df8885989a1dea8fdf48b851f449dc280e19de..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/asheville.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/brushstrokes.jpg b/hub_module/tests/image_dataset/style_tranfer/style/brushstrokes.jpg deleted file mode 100644 index 564d825c2029f895cee7c9f0c0c2dbe90c3ada49..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/brushstrokes.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/flower_of_life.jpg b/hub_module/tests/image_dataset/style_tranfer/style/flower_of_life.jpg deleted file mode 100644 index 6a313762217e11df60ae9e77af322c1cc30cd0fc..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/flower_of_life.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/goeritz.jpg b/hub_module/tests/image_dataset/style_tranfer/style/goeritz.jpg deleted file mode 100644 index 0d9994865909490ec93a0151504a3cee59a52c56..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/goeritz.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/impronte_d_artista.jpg b/hub_module/tests/image_dataset/style_tranfer/style/impronte_d_artista.jpg deleted file mode 100644 index 9739af1e13ca27f1a45c057f9fe9c7448e9101bd..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/impronte_d_artista.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/mondrian_cropped.jpg b/hub_module/tests/image_dataset/style_tranfer/style/mondrian_cropped.jpg deleted file mode 100644 index ea4094a0517903a734c3cd752f71c6dfa4de406a..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/mondrian_cropped.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/picasso_self_portrait.jpg b/hub_module/tests/image_dataset/style_tranfer/style/picasso_self_portrait.jpg deleted file mode 100644 index 3462b70be0bfe4c4272ec3d86e4639a7000b1a16..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/picasso_self_portrait.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/scene_de_rue.jpg b/hub_module/tests/image_dataset/style_tranfer/style/scene_de_rue.jpg deleted file mode 100644 index a627dffd36a6fb4193e3e44b353bccb6ce10ac5b..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/scene_de_rue.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/sketch.png b/hub_module/tests/image_dataset/style_tranfer/style/sketch.png deleted file mode 100644 index 95ad03fdbcbd4359ee3073c6a8fa0646d3f7045d..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/sketch.png and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/the_resevoir_at_poitiers.jpg b/hub_module/tests/image_dataset/style_tranfer/style/the_resevoir_at_poitiers.jpg deleted file mode 100644 index 6f0534c3ed931735c855b70770ae195e65848379..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/the_resevoir_at_poitiers.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/style_tranfer/style/woman_with_hat_matisse.jpg b/hub_module/tests/image_dataset/style_tranfer/style/woman_with_hat_matisse.jpg deleted file mode 100644 index a4675c78d98afb52f4680f5bc151a9f09310f7e8..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/style_tranfer/style/woman_with_hat_matisse.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/text_recognition/11.jpg b/hub_module/tests/image_dataset/text_recognition/11.jpg deleted file mode 100755 index ed91b8c5ca2a348fe7b138e83114ff81ecb107de..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/text_recognition/11.jpg and /dev/null differ diff --git a/hub_module/tests/image_dataset/text_recognition/test_image.jpg b/hub_module/tests/image_dataset/text_recognition/test_image.jpg deleted file mode 100644 index be103f39ec5c2a4e4681ffb82bf8231feef1c048..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/image_dataset/text_recognition/test_image.jpg and /dev/null differ diff --git a/hub_module/tests/unittests/test_ace2p.py b/hub_module/tests/unittests/test_ace2p.py deleted file mode 100644 index 9c31c593512ae0091b9ed1b8e55e5dea14fd2b56..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ace2p.py +++ /dev/null @@ -1,85 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/semantic_segmentation/' - - -class TestAce2p(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.human_parsing = hub.Module(name="ace2p") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.human_parsing = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.human_parsing.segmentation( - paths=[pic_path], use_gpu=True, visualization=True) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - result = self.human_parsing.segmentation( - paths=pics_path_list, - batch_size=5, - output_dir='batch_output', - use_gpu=True, - visualization=True) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.human_parsing.segmentation( - images=[im], - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.human_parsing.save_inference_model( - dirname='ace2p', model_filename='model', combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestAce2p('test_single_pic')) - suite.addTest(TestAce2p('test_batch')) - suite.addTest(TestAce2p('test_ndarray')) - suite.addTest(TestAce2p('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_bert_chn_base.py b/hub_module/tests/unittests/test_bert_chn_base.py deleted file mode 100644 index 02790abd34dfb7916c68cd23e0b862ebf66ae8a2..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_bert_chn_base.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import numpy as np -import paddlehub as hub - - -class BERTChnBaseTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='bert_chinese_L-12_H-768_A-12') - self.test_text = [[ - '飞桨(PaddlePaddle)是国内开源产业级深度学习平台', 'PaddleHub是飞桨生态的预训练模型应用工具' - ], ["飞浆PaddleHub"]] - - def test_get_embedding(self): - # test batch_size - results = self.module.get_embedding( - texts=self.test_text, use_gpu=False, batch_size=1) - results_2 = self.module.get_embedding( - texts=self.test_text, use_gpu=False, batch_size=10) - # 2 sample results - self.assertEqual(len(results), 2) - self.assertEqual(len(results_2), 2) - # sequence embedding and token embedding results per sample - self.assertEqual(len(results[0]), 2) - self.assertEqual(len(results_2[0]), 2) - # sequence embedding shape - self.assertEqual(results[0][0].shape, (768, )) - self.assertEqual(results_2[0][0].shape, (768, )) - # token embedding shape, max_seq_len is 512 - self.assertEqual(results[0][1].shape, (512, 768)) - self.assertEqual(results_2[0][1].shape, (512, 768)) - - # test gpu - results_3 = self.module.get_embedding( - texts=self.test_text, use_gpu=True, batch_size=1) - diff = np.abs(results[0][0] - results_3[0][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[0][1] - results_3[0][1]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][0] - results_3[1][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][1] - results_3[1][1]) - self.assertTrue((diff < 1e-6).all) - - def test_get_params_layer(self): - self.module.context() - layers = self.module.get_params_layer() - layers = list(set(layers.values())) - true_layers = [i for i in range(12)] - self.assertEqual(layers, true_layers) - - def test_get_spm_path(self): - self.assertEqual(self.module.get_spm_path(), None) - - def test_get_word_dict_path(self): - self.assertEqual(self.module.get_word_dict_path(), None) - - def test_get_vocab_path(self): - vocab_path = self.module.get_vocab_path() - true_vocab_path = os.path.join(self.module.directory, "assets", - "vocab.txt") - self.assertEqual(vocab_path, true_vocab_path) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_chinese_ocr_db_crnn_mobile.py b/hub_module/tests/unittests/test_chinese_ocr_db_crnn_mobile.py deleted file mode 100644 index cb39ad8d0a33b341f40c9521b0a05650af42fa6c..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_chinese_ocr_db_crnn_mobile.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import cv2 -import paddlehub as hub - - -class ChineseOCRDBCRNNTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='chinese_ocr_db_crnn_mobile') - self.test_images = [ - "../image_dataset/text_recognition/11.jpg", - "../image_dataset/text_recognition/test_image.jpg" - ] - - def test_detect_text(self): - results_1 = self.module.recognize_text( - paths=self.test_images, use_gpu=True) - results_2 = self.module.recognize_text( - paths=self.test_images, use_gpu=False) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.recognize_text( - images=test_images, use_gpu=False) - for i, res in enumerate(results_1): - self.assertEqual(res['save_path'], '') - - for j, item in enumerate(res['data']): - self.assertEqual(item['confidence'], - results_2[i]['data'][j]['confidence']) - self.assertEqual(item['confidence'], - results_3[i]['data'][j]['confidence']) - self.assertEqual(item['text'], results_2[i]['data'][j]['text']) - self.assertEqual(item['text'], results_3[i]['data'][j]['text']) - self.assertEqual(item['text_box_position'], - results_2[i]['data'][j]['text_box_position']) - self.assertEqual(item['text_box_position'], - results_3[i]['data'][j]['text_box_position']) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_chinese_ocr_db_crnn_server.py b/hub_module/tests/unittests/test_chinese_ocr_db_crnn_server.py deleted file mode 100644 index a3e74d0730b93848d7b65dddee22e2693833c5c2..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_chinese_ocr_db_crnn_server.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import cv2 -import paddlehub as hub - - -class ChineseOCRDBCRNNTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='chinese_ocr_db_crnn_server') - self.test_images = [ - "../image_dataset/text_recognition/11.jpg", - "../image_dataset/text_recognition/test_image.jpg" - ] - - def test_detect_text(self): - results_1 = self.module.recognize_text( - paths=self.test_images, use_gpu=True) - results_2 = self.module.recognize_text( - paths=self.test_images, use_gpu=False) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.recognize_text( - images=test_images, use_gpu=False) - for i, res in enumerate(results_1): - self.assertEqual(res['save_path'], '') - - for j, item in enumerate(res['data']): - self.assertEqual(item['confidence'], - results_2[i]['data'][j]['confidence']) - self.assertEqual(item['confidence'], - results_3[i]['data'][j]['confidence']) - self.assertEqual(item['text'], results_2[i]['data'][j]['text']) - self.assertEqual(item['text'], results_3[i]['data'][j]['text']) - self.assertEqual(item['text_box_position'], - results_2[i]['data'][j]['text_box_position']) - self.assertEqual(item['text_box_position'], - results_3[i]['data'][j]['text_box_position']) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_chinese_text_detection_db_mobile.py b/hub_module/tests/unittests/test_chinese_text_detection_db_mobile.py deleted file mode 100644 index 01819f98141f5f399f912e051f7b17ed8e53e40f..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_chinese_text_detection_db_mobile.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import cv2 -import paddlehub as hub - - -class ChineseTextDetectionDBTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='chinese_text_detection_db_mobile') - self.test_images = [ - "../image_dataset/text_recognition/11.jpg", - "../image_dataset/text_recognition/test_image.jpg" - ] - - def test_detect_text(self): - results_1 = self.module.detect_text( - paths=self.test_images, use_gpu=True) - results_2 = self.module.detect_text( - paths=self.test_images, use_gpu=False) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.detect_text(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertEqual(res['save_path'], '') - self.assertEqual(res['data'], results_2[index]['data']) - self.assertEqual(res['data'], results_3[index]['data']) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_chinese_text_detection_db_server.py b/hub_module/tests/unittests/test_chinese_text_detection_db_server.py deleted file mode 100644 index 6f0fd98910284275ade8dab1c5f502fbd5702183..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_chinese_text_detection_db_server.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import cv2 -import paddlehub as hub - - -class ChineseTextDetectionDBTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='chinese_text_detection_db_server') - self.test_images = [ - "../image_dataset/text_recognition/11.jpg", - "../image_dataset/text_recognition/test_image.jpg" - ] - - def test_detect_text(self): - results_1 = self.module.detect_text( - paths=self.test_images, use_gpu=True) - results_2 = self.module.detect_text( - paths=self.test_images, use_gpu=False) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.detect_text(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertEqual(res['save_path'], '') - self.assertEqual(res['data'], results_2[index]['data']) - self.assertEqual(res['data'], results_3[index]['data']) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_darknet53.py b/hub_module/tests/unittests/test_darknet53.py deleted file mode 100644 index 809e40a928439628017a71b690a3d765ead8c460..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_darknet53.py +++ /dev/null @@ -1,66 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -class TestDarkNet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.darknet = hub.Module(name='darknet53_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.darknet = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - image = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - inputs, outputs, program = self.darknet.context( - input_image=image, - pretrained=False, - trainable=True, - param_prefix='BaiDu') - image = inputs["image"] - body_feats = outputs['body_feats'] - - def test_classification(self): - with fluid.program_guard(self.test_prog): - image_dir = "../image_dataset/pascal_voc/" - airplane = cv2.imread(os.path.join( - image_dir, 'airplane.jpg')).astype('float32') - airplanes = np.array([airplane, airplane]) - classification_results = self.darknet.classification( - paths=[ - os.path.join(image_dir, 'bird.jpg'), - os.path.join(image_dir, 'bike.jpg'), - os.path.join(image_dir, 'cowboy.jpg'), - os.path.join(image_dir, 'sheep.jpg'), - os.path.join(image_dir, 'train.jpg') - ], - images=airplanes, - batch_size=2) - print(classification_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestDarkNet('test_context')) - suite.addTest(TestDarkNet('test_classification')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_deeplabv3p_xception65_humanseg.py b/hub_module/tests/unittests/test_deeplabv3p_xception65_humanseg.py deleted file mode 100644 index 3da25d780dded92821c40225b9f1065f3fe58b4c..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_deeplabv3p_xception65_humanseg.py +++ /dev/null @@ -1,88 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/keypoint_detection/' - - -class TestHumanSeg(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.human_seg = hub.Module(name="deeplabv3p_xception65_humanseg") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.human_seg = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.human_seg.segmentation( - paths=[pic_path], use_gpu=True, visualization=True) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - result = self.human_seg.segmentation( - paths=pics_path_list, - batch_size=5, - output_dir='batch_output', - use_gpu=True, - visualization=True) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - for pic_path in pics_path_list: - result = self.human_seg.segmentation( - images=[cv2.imread(pic_path)], - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.human_seg.save_inference_model( - dirname='deeplabv3p_xception65_humanseg', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestHumanSeg('test_single_pic')) - suite.addTest(TestHumanSeg('test_batch')) - suite.addTest(TestHumanSeg('test_ndarray')) - suite.addTest(TestHumanSeg('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_efficientnetb0_imagenet.py b/hub_module/tests/unittests/test_efficientnetb0_imagenet.py deleted file mode 100644 index abfa3f7f9e00794aba0913c64f7ebaf79ff430b1..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb0_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB0TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb0_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb0_small_imagenet.py b/hub_module/tests/unittests/test_efficientnetb0_small_imagenet.py deleted file mode 100644 index 3713b907a6aa8ddf40b7367037ea00e7ef4c929b..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb0_small_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB0SmallTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb0_small_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb1_imagenet.py b/hub_module/tests/unittests/test_efficientnetb1_imagenet.py deleted file mode 100644 index 7762019eb528737779fc2b268478a9f89f959100..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb1_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB1TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb1_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb2_imagenet.py b/hub_module/tests/unittests/test_efficientnetb2_imagenet.py deleted file mode 100644 index 2159f6791e3bc9244909acfb2659a43218124c24..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb2_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB2TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb2_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb3_imagenet.py b/hub_module/tests/unittests/test_efficientnetb3_imagenet.py deleted file mode 100644 index b1bbce8cf01a33dd2a42c81025044c1b3e24c372..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb3_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB3TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb3_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb4_imagenet.py b/hub_module/tests/unittests/test_efficientnetb4_imagenet.py deleted file mode 100644 index c321f39f215ef58dced3a99e188e4e6924c716ef..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb4_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB4TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb4_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb5_imagenet.py b/hub_module/tests/unittests/test_efficientnetb5_imagenet.py deleted file mode 100644 index fab65efd843c34e4450c8f9f8ea42e22cf733b02..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb5_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB5TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb5_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb6_imagenet.py b/hub_module/tests/unittests/test_efficientnetb6_imagenet.py deleted file mode 100644 index 9fd407745c142995f395500cd8afa8e4b8ab91f9..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb6_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB6TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb6_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_efficientnetb7_imagenet.py b/hub_module/tests/unittests/test_efficientnetb7_imagenet.py deleted file mode 100644 index 325b7bee35a7843826252e3ba1d308d26d48c68a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_efficientnetb7_imagenet.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import cv2 -import numpy as np -import paddlehub as hub - - -class EfficientNetB7TestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='efficientnetb7_imagenet') - self.test_images = [ - "../image_dataset/classification/animals/dog.jpeg", - "../image_dataset/keypoint_detection/girl2.jpg" - ] - self.true_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3).tolist() - self.true_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3).tolist() - - def test_classifcation(self): - results_1 = self.module.classify(paths=self.test_images, use_gpu=True) - results_2 = self.module.classify(paths=self.test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_2[index].keys()) - diff = list(res.values())[0] - list(results_2[index].values())[0] - self.assertTrue((diff < 1e-5)) - - test_images = [cv2.imread(img) for img in self.test_images] - results_3 = self.module.classify(images=test_images, use_gpu=False) - for index, res in enumerate(results_1): - self.assertTrue(res.keys(), results_3[index].keys()) - - results_4 = self.module.classify( - images=test_images, use_gpu=True, top_k=2) - for res in results_4: - self.assertEqual(len(res.keys()), 2) - - def test_common_apis(self): - width = self.module.get_expected_image_width() - height = self.module.get_expected_image_height() - mean = self.module.get_pretrained_images_mean() - std = self.module.get_pretrained_images_std() - - self.assertEqual(width, 224) - self.assertEqual(height, 224) - self.assertEqual(mean.tolist(), self.true_mean) - self.assertEqual(std.tolist(), self.true_std) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_emotion_detection_textcnn.py b/hub_module/tests/unittests/test_emotion_detection_textcnn.py deleted file mode 100644 index aadb4a92dd25bec7dee2b2703a49d0a5c9b07c84..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_emotion_detection_textcnn.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddlehub as hub - - -class EmotionDetectionTextCNNTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='emotion_detection_textcnn') - self.test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - self.results = [{ - 'text': '这家餐厅很好吃', - 'emotion_label': 1, - 'emotion_key': 'neutral', - 'positive_probs': 0.3839, - 'negative_probs': 0.0414, - 'neutral_probs': 0.5747 - }, - { - 'text': '这部电影真的很差劲', - 'emotion_label': 1, - 'emotion_key': 'neutral', - 'positive_probs': 0.3875, - 'negative_probs': 0.1259, - 'neutral_probs': 0.4866 - }] - self.labels = {"positive": 2, "negative": 0, "neutral": 1} - - def test_emotion_classify(self): - # test batch_size - results = self.module.emotion_classify( - texts=self.test_text, use_gpu=False, batch_size=1) - self.assertEqual(results, self.results) - results = self.module.emotion_classify( - texts=self.test_text, use_gpu=False, batch_size=10) - self.assertEqual(results, self.results) - - # test gpu - results = self.module.emotion_classify( - texts=self.test_text, use_gpu=True, batch_size=1) - self.assertEqual(results, self.results) - - def test_get_vocab_path(self): - true_vocab_path = os.path.join(self.module.directory, "assets", - "vocab.txt") - vocab_path = self.module.get_vocab_path() - self.assertEqual(vocab_path, true_vocab_path) - - def test_get_labels(self): - labels = self.module.get_labels() - self.assertEqual(labels, self.labels) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_ernie.py b/hub_module/tests/unittests/test_ernie.py deleted file mode 100644 index 71115093f94db4ab7c396b54108355c5a391e69a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ernie.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import numpy as np -import paddlehub as hub - - -class ERNIETestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='ernie') - self.test_text = [[ - '飞桨(PaddlePaddle)是国内开源产业级深度学习平台', 'PaddleHub是飞桨生态的预训练模型应用工具' - ], ["飞浆PaddleHub"]] - - def test_get_embedding(self): - # test batch_size - results = self.module.get_embedding( - texts=self.test_text, use_gpu=False, batch_size=1) - results_2 = self.module.get_embedding( - texts=self.test_text, use_gpu=False, batch_size=10) - # 2 sample results - self.assertEqual(len(results), 2) - self.assertEqual(len(results_2), 2) - # sequence embedding and token embedding results per sample - self.assertEqual(len(results[0]), 2) - self.assertEqual(len(results_2[0]), 2) - # sequence embedding shape - self.assertEqual(results[0][0].shape, (768, )) - self.assertEqual(results_2[0][0].shape, (768, )) - # token embedding shape, max_seq_len is 512 - self.assertEqual(results[0][1].shape, (512, 768)) - self.assertEqual(results_2[0][1].shape, (512, 768)) - - # test gpu - results_3 = self.module.get_embedding( - texts=self.test_text, use_gpu=True, batch_size=1) - diff = np.abs(results[0][0] - results_3[0][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[0][1] - results_3[0][1]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][0] - results_3[1][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][1] - results_3[1][1]) - self.assertTrue((diff < 1e-6).all) - - def test_get_params_layer(self): - self.module.context() - layers = self.module.get_params_layer() - layers = list(set(layers.values())) - true_layers = [i for i in range(12)] - self.assertEqual(layers, true_layers) - - def test_get_spm_path(self): - self.assertEqual(self.module.get_spm_path(), None) - - def test_get_word_dict_path(self): - self.assertEqual(self.module.get_word_dict_path(), None) - - def test_get_vocab_path(self): - vocab_path = self.module.get_vocab_path() - true_vocab_path = os.path.join(self.module.directory, "assets", - "vocab.txt") - self.assertEqual(vocab_path, true_vocab_path) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_ernie_gen_couplet.py b/hub_module/tests/unittests/test_ernie_gen_couplet.py deleted file mode 100644 index 0b34898eed5fa9ad8f88b6085301066766593f01..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ernie_gen_couplet.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from unittest import TestCase, main -import paddlehub as hub - - -class ErnieGenCoupletTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='ernie_gen_couplet') - self.left = ["风吹云乱天垂泪", "若有经心风过耳"] - - def test_predict(self): - rights = self.module.generate(self.left) - self.assertEqual(len(rights), 2) - self.assertEqual(len(rights[0]), 5) - self.assertEqual(len(rights[0][0]), 7) - self.assertEqual(len(rights[1][0]), 7) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_ernie_gen_poetry.py b/hub_module/tests/unittests/test_ernie_gen_poetry.py deleted file mode 100644 index ae03d16e98907b14c5d647c0fa3628c177b6873a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ernie_gen_poetry.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from unittest import TestCase, main -import paddlehub as hub - - -class ErnieGenPoetryTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='ernie_gen_poetry') - self.left = ["昔年旅南服,始识王荆州。", "高名出汉阴,禅阁跨香岑。"] - - def test_predict(self): - rights = self.module.generate(self.left) - self.assertEqual(len(rights), 2) - self.assertEqual(len(rights[0]), 5) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_ernie_skep_sentiment_analysis.py b/hub_module/tests/unittests/test_ernie_skep_sentiment_analysis.py deleted file mode 100644 index bcceb2382f1d3daea413928321c915dc8130715f..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ernie_skep_sentiment_analysis.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -import numpy as np -import paddlehub as hub - - -class ErnieSkepSentimentAnalysisTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='ernie_skep_sentiment_analysis') - self.test_text = [[ - '飞桨(PaddlePaddle)是国内开源产业级深度学习平台', 'PaddleHub是飞桨生态的预训练模型应用工具' - ], ["飞浆PaddleHub"]] - self.test_data = ['你不是不聪明,而是不认真', '虽然小明很努力,但是他还是没有考100分'] - self.results = [{ - 'text': '你不是不聪明,而是不认真', - 'sentiment_label': 'negative', - 'positive_probs': 0.10738213360309601, - 'negative_probs': 0.8926178216934204 - }, - { - 'text': '虽然小明很努力,但是他还是没有考100分', - 'sentiment_label': 'negative', - 'positive_probs': 0.053915347903966904, - 'negative_probs': 0.9460846185684204 - }] - - def test_predict_sentiment(self): - results_1 = self.module.predict_sentiment(self.test_data, use_gpu=False) - results_2 = self.module.predict_sentiment(self.test_data, use_gpu=True) - - for index, res in enumerate(results_1): - self.assertEqual(res['text'], self.results[index]['text']) - self.assertEqual(res['sentiment_label'], - self.results[index]['sentiment_label']) - self.assertTrue( - abs(res['positive_probs'] - - self.results[index]['positive_probs']) < 1e-6) - self.assertTrue( - abs(res['negative_probs'] - - self.results[index]['negative_probs']) < 1e-6) - - self.assertEqual(res['text'], results_2[index]['text']) - self.assertEqual(res['sentiment_label'], - results_2[index]['sentiment_label']) - self.assertTrue( - abs(res['positive_probs'] - - results_2[index]['positive_probs']) < 1e-6) - self.assertTrue( - abs(res['negative_probs'] - - results_2[index]['negative_probs']) < 1e-6) - - def test_get_embedding(self): - # test batch_size - max_seq_len = 128 - results = self.module.get_embedding( - texts=self.test_text, - use_gpu=False, - batch_size=1, - max_seq_len=max_seq_len) - results_2 = self.module.get_embedding( - texts=self.test_text, - use_gpu=False, - batch_size=10, - max_seq_len=max_seq_len) - # 2 sample results - self.assertEqual(len(results), 2) - self.assertEqual(len(results_2), 2) - # sequence embedding and token embedding results per sample - self.assertEqual(len(results[0]), 2) - self.assertEqual(len(results_2[0]), 2) - # sequence embedding shape - self.assertEqual(results[0][0].shape, (1024, )) - self.assertEqual(results_2[0][0].shape, (1024, )) - # token embedding shape - self.assertEqual(results[0][1].shape, (max_seq_len, 1024)) - self.assertEqual(results_2[0][1].shape, (max_seq_len, 1024)) - - # test gpu - results_3 = self.module.get_embedding( - texts=self.test_text, - use_gpu=True, - batch_size=1, - max_seq_len=max_seq_len) - diff = np.abs(results[0][0] - results_3[0][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[0][1] - results_3[0][1]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][0] - results_3[1][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][1] - results_3[1][1]) - self.assertTrue((diff < 1e-6).all) - - def test_get_params_layer(self): - self.module.context() - layers = self.module.get_params_layer() - layers = list(set(layers.values())) - true_layers = [i for i in range(24)] - self.assertEqual(layers, true_layers) - - def test_get_spm_path(self): - self.assertEqual(self.module.get_spm_path(), None) - - def test_get_word_dict_path(self): - self.assertEqual(self.module.get_word_dict_path(), None) - - def test_get_vocab_path(self): - vocab_path = self.module.get_vocab_path() - true_vocab_path = os.path.join(self.module.directory, "assets", - "ernie_1.0_large_ch.vocab.txt") - self.assertEqual(vocab_path, true_vocab_path) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_ernie_tiny_couplet.py b/hub_module/tests/unittests/test_ernie_tiny_couplet.py deleted file mode 100644 index 1c3637cec9c2b33936f2a86c81707b3e82bca411..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ernie_tiny_couplet.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from unittest import TestCase, main -import paddlehub as hub - - -class ErnieTinyCoupletTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='ernie_tiny_couplet') - self.left = ["风吹云乱天垂泪", "若有经心风过耳"] - - def test_predict(self): - rights = self.module.predict(self.left) - self.assertEqual(len(rights), 2) - self.assertEqual(len(rights[0]), 10) - self.assertEqual(len(rights[0][0]), 7) - self.assertEqual(len(rights[1][0]), 7) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_face_landmark_localization.py b/hub_module/tests/unittests/test_face_landmark_localization.py deleted file mode 100644 index ef9820aee0bea019a84b1f5d13303a4faecdce5b..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_face_landmark_localization.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/keypoint_detection' - - -class TestFaceLocate(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.face_locate = hub.Module(name="face_landmark_localization") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.face_locate = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.face_locate.context() - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.face_locate.keypoint_detection( - paths=[pic_path], use_gpu=True, visualization=True) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.face_locate.keypoint_detection( - images=np.expand_dims(im, axis=0), - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - print(result) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFaceLocate('test_single_pic')) - suite.addTest(TestFaceLocate('test_ndarray')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_faster_rcnn_resnet50.py b/hub_module/tests/unittests/test_faster_rcnn_resnet50.py deleted file mode 100644 index 2ec22a54e37992e17a73430d2cfd3b6dc4136983..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_faster_rcnn_resnet50.py +++ /dev/null @@ -1,70 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestFasterRCNNResNet50(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.faster_rcnn_r50 = hub.Module(name="faster_rcnn_resnet50_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.faster_rcnn_r50 = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - inputs, outputs, program = self.faster_rcnn_r50.context( - pretrained=False, trainable=True, phase='train') - image = inputs['image'] - im_info = inputs['im_info'] - im_shape = inputs['im_shape'] - gt_class = inputs['gt_class'] - gt_bbox = inputs['gt_bbox'] - is_crowd = inputs['is_crowd'] - head_feat = outputs['head_feat'] - rpn_cls_loss = outputs['rpn_cls_loss'] - rpn_reg_loss = outputs['rpn_reg_loss'] - generate_proposal_labels = outputs['generate_proposal_labels'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - detection_results = self.faster_rcnn_r50.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2, - use_gpu=True, - score_thresh=0.5) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFasterRCNNResNet50('test_object_detection')) - suite.addTest(TestFasterRCNNResNet50('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_faster_rcnn_resnet50_fpn.py b/hub_module/tests/unittests/test_faster_rcnn_resnet50_fpn.py deleted file mode 100644 index 7e267c5ed3bb778d01230c1a8ceade1cc53cdba7..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_faster_rcnn_resnet50_fpn.py +++ /dev/null @@ -1,72 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestFasterRCNNR50FPN(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.faster_rcnn_r50_fpn = hub.Module( - name="faster_rcnn_resnet50_fpn_coco2017") - # self.faster_rcnn_r50_fpn = hub.Module(directory='') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.faster_rcnn_r50_fpn = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - inputs, outputs, program = self.faster_rcnn_r50_fpn.context( - pretrained=False, trainable=True, phase='train') - image = inputs['image'] - im_info = inputs['im_info'] - im_shape = inputs['im_shape'] - gt_class = inputs['gt_class'] - gt_bbox = inputs['gt_bbox'] - is_crowd = inputs['is_crowd'] - head_feat = outputs['head_feat'] - rpn_cls_loss = outputs['rpn_cls_loss'] - rpn_reg_loss = outputs['rpn_reg_loss'] - generate_proposal_labels = outputs['generate_proposal_labels'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - detection_results = self.faster_rcnn_r50_fpn.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2, - use_gpu=False, - score_thresh=0.5) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFasterRCNNR50FPN('test_object_detection')) - suite.addTest(TestFasterRCNNR50FPN('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_fix_resnext101_32x48d_wsl_imagenet.py b/hub_module/tests/unittests/test_fix_resnext101_32x48d_wsl_imagenet.py deleted file mode 100644 index 71c962a6b7b1865cee305553a2a399d3b702cf5a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_fix_resnext101_32x48d_wsl_imagenet.py +++ /dev/null @@ -1,92 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestFixResnext10132x48dwslImagenet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - # self.classifier = hub.Module(name="resnet50_vd_animals") - self.classifier = hub.Module( - directory='fix_resnext101_32x48d_wsl_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.classifier = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.classifier.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.classifier.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.classifier.classification( - paths=pics_path_list, batch_size=3, use_gpu=False, top_k=2) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.classifier.classification( - images=[im], use_gpu=True, top_k=5) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.classifier.save_inference_model( - dirname='resnet50_vd_animals', - model_filename='model', - combined=False) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFixResnext10132x48dwslImagenet('test_single_pic')) - suite.addTest(TestFixResnext10132x48dwslImagenet('test_batch')) - suite.addTest(TestFixResnext10132x48dwslImagenet('test_ndarray')) - suite.addTest( - TestFixResnext10132x48dwslImagenet('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_human_pose_estimation_resnet50_mpii.py b/hub_module/tests/unittests/test_human_pose_estimation_resnet50_mpii.py deleted file mode 100644 index a226049c6c30752c862beee15a4356dae3828abc..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_human_pose_estimation_resnet50_mpii.py +++ /dev/null @@ -1,86 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/keypoint_detection/' - - -class TestHumanPoseEstimation(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.pose = hub.Module(name="human_pose_estimation_resnet50_mpii") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.pose = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.pose.keypoint_detection( - paths=[pic_path], use_gpu=True, visualization=True) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - result = self.pose.keypoint_detection( - paths=pics_path_list, - batch_size=3, - output_dir='batch_output', - use_gpu=True, - visualization=True) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.pose.keypoint_detection( - images=[cv2.imread(pic_path)], - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.pose.save_inference_model( - dirname='human_pose_estimation_resnet50_mpii', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestHumanPoseEstimation('test_single_pic')) - suite.addTest(TestHumanPoseEstimation('test_batch')) - suite.addTest(TestHumanPoseEstimation('test_ndarray')) - suite.addTest(TestHumanPoseEstimation('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_lac.py b/hub_module/tests/unittests/test_lac.py deleted file mode 100644 index 7e78f7e7d1092ae760da6f96e6797fb3eb49b98b..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_lac.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddlehub as hub - - -class LacTestCase(TestCase): - # yapf: disable - def setUp(self): - self.module = hub.Module(name='lac') - self.user_dict_path = '../user.dict' - self.test_text = ["今天是个好日子", "春天的花开秋天的风以及冬天的落阳"] - self.results_tag = [ - { - 'word': ['今天', '是', '个', '好日子'], - 'tag': ['TIME', 'v', 'q', 'n'] - }, - { - 'word': ['春天', '的', '花开', '秋天', '的', '风', '以及', '冬天', '的', '落阳'], - 'tag': ['TIME', 'u', 'v', 'TIME', 'u', 'n', 'c', 'TIME', 'u', 'vn'] - } - ] - self.results_notag = [ - { - 'word': ['今天', '是', '个', '好日子'] - }, - { - 'word': ['春天', '的', '花开', '秋天', '的', '风', '以及', '冬天', '的', '落阳'] - } - ] - self.results_notag_userdict = [ - { - 'word': ['今天', '是', '个', '好日子'] - }, - { - 'word': ['春天', '的', '花', '开', '秋天的风', '以及', '冬天', '的', '落', '阳'] - } - ] - self.tags = { - 'n': '普通名词', - 'f': '方位名词', - 's': '处所名词', - 't': '时间', - 'nr': '人名', - 'ns': '地名', - 'nt': '机构名', - 'nw': '作品名', - 'nz': '其他专名', - 'v': '普通动词', - 'vd': '动副词', - 'vn': '名动词', - 'a': '形容词', - 'ad': '副形词', - 'an': '名形词', - 'd': '副词', - 'm': '数量词', - 'q': '量词', - 'r': '代词', - 'p': '介词', - 'c': '连词', - 'u': '助词', - 'xc': '其他虚词', - 'w': '标点符号', - 'PER': '人名', - 'LOC': '地名', - 'ORG': '机构名', - 'TIME': '时间' - } - # yapf: enable. - - def test_set_user_dict(self): - self.module.set_user_dict(self.user_dict_path) - self.assertNotEqual(self.module.custom, None) - - def test_del_user_dict(self): - self.module.set_user_dict(self.user_dict_path) - self.assertNotEqual(self.module.custom, None) - self.module.del_user_dict() - self.assertEqual(self.module.custom, None) - - def test_lexical_analysis(self): - self.module.del_user_dict() - - # test batch_size - results = self.module.lexical_analysis( - texts=self.test_text, use_gpu=False, batch_size=1, return_tag=False) - self.assertEqual(results, self.results_notag) - results = self.module.lexical_analysis( - texts=self.test_text, - use_gpu=False, - batch_size=10, - return_tag=False) - self.assertEqual(results, self.results_notag) - - # test return_tag - results = self.module.lexical_analysis( - texts=self.test_text, use_gpu=False, batch_size=1, return_tag=True) - self.assertEqual(results, self.results_tag) - - # test gpu - results = self.module.lexical_analysis( - texts=self.test_text, use_gpu=True, batch_size=1, return_tag=False) - self.assertEqual(results, self.results_notag) - - # test results to add user_dict - self.module.set_user_dict(self.user_dict_path) - results = self.module.lexical_analysis( - texts=self.test_text, use_gpu=False, batch_size=1, return_tag=False) - self.assertEqual(results, self.results_notag_userdict) - - def test_cut(self): - # test batch_size - results = self.module.cut( - text=self.test_text, use_gpu=False, batch_size=1, return_tag=False) - self.assertEqual(results, self.results_notag) - results = self.module.cut( - text=self.test_text, use_gpu=False, batch_size=10, return_tag=False) - self.assertEqual(results, self.results_notag) - - # test return_tag - results = self.module.cut( - text=self.test_text, use_gpu=False, batch_size=1, return_tag=True) - self.assertEqual(results, self.results_tag) - - # test gpu - results = self.module.cut( - text=self.test_text, use_gpu=True, batch_size=1, return_tag=False) - self.assertEqual(results, self.results_notag) - - # test results to add user_dict - self.module.set_user_dict(self.user_dict_path) - results = self.module.cut( - text=self.test_text, use_gpu=False, batch_size=1, return_tag=False) - self.assertEqual(results, self.results_notag_userdict) - - # test single sentence - results = self.module.cut( - text="今天是个好日子", use_gpu=False, batch_size=1, return_tag=False) - self.assertEqual(results, ['今天', '是', '个', '好日子']) - - def test_get_tags(self): - tags = self.module.get_tags() - self.assertEqual(tags, self.tags) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_mobilenet_v1.py b/hub_module/tests/unittests/test_mobilenet_v1.py deleted file mode 100644 index 1c24d7d75a65ffcbf8e0f69661b386d2f710a0e0..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_mobilenet_v1.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -class TestMobileNetV1(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - # self.mobilenet_v1 = hub.Module(name="mobilenet_v1") - self.mobilenet_v1 = hub.Module(name='mobilenet_v1_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.ssd = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - image = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - inputs, outputs, program = self.mobilenet_v1.context( - input_image=image, - pretrained=False, - trainable=True, - param_prefix='BaiDu') - image = inputs["image"] - body_feats = outputs['body_feats'] - - def test_classification(self): - with fluid.program_guard(self.test_prog): - image_dir = "../image_dataset/pascal_voc" - airplane = cv2.imread(os.path.join( - image_dir, 'airplane.jpg')).astype('float32') - airplanes = np.array([airplane, airplane]) - classification_results = self.mobilenet_v1.classification( - paths=[ - os.path.join(image_dir, 'bird.jpg'), - os.path.join(image_dir, 'bike.jpg'), - os.path.join(image_dir, 'cowboy.jpg'), - os.path.join(image_dir, 'sheep.jpg'), - os.path.join(image_dir, 'train.jpg') - ], - images=airplanes, - batch_size=2) - print(classification_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestMobileNetV1('test_context')) - suite.addTest(TestMobileNetV1('test_classification')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_mobilenet_v2_animals.py b/hub_module/tests/unittests/test_mobilenet_v2_animals.py deleted file mode 100644 index 2f65d15ff82b7c5bddb21a841f507bbb991158d1..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_mobilenet_v2_animals.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestMobileNetV2Animal(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.animal_classify = hub.Module(name="mobilenet_v2_animals") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.animal_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.animal_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.animal_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.animal_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.animal_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.animal_classify.save_inference_model( - dirname='mobilenet_v2_animals', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestMobileNetV2Animal('test_context')) - suite.addTest(TestMobileNetV2Animal('test_single_pic')) - suite.addTest(TestMobileNetV2Animal('test_batch')) - suite.addTest(TestMobileNetV2Animal('test_ndarray')) - suite.addTest(TestMobileNetV2Animal('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_mobilenet_v2_dishes.py b/hub_module/tests/unittests/test_mobilenet_v2_dishes.py deleted file mode 100644 index 18bf7ba23e3ed0052e0bae887d4546446cc212aa..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_mobilenet_v2_dishes.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/dish/' - - -class TestMobileNetV2Dish(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.dish_classify = hub.Module(name="mobilenet_v2_dishes") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.dish_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.dish_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.dish_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.dish_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - print(pic_path) - result = self.dish_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.dish_classify.save_inference_model( - dirname='mobilenet_v2_dishes', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestMobileNetV2Dish('test_context')) - suite.addTest(TestMobileNetV2Dish('test_single_pic')) - suite.addTest(TestMobileNetV2Dish('test_batch')) - suite.addTest(TestMobileNetV2Dish('test_ndarray')) - suite.addTest(TestMobileNetV2Dish('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_mobilenet_v2_imagenet_ssld.py b/hub_module/tests/unittests/test_mobilenet_v2_imagenet_ssld.py deleted file mode 100644 index d6c018e6e895f6cc6f58be4ea19a38fef53af416..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_mobilenet_v2_imagenet_ssld.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestMobileNetV2SSLD(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.animal_classify = hub.Module(name="mobilenet_v2_imagenet_ssld") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.animal_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.animal_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.animal_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.animal_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.animal_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.animal_classify.save_inference_model( - dirname='mobilenet_v2_imagenet_ssld', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestMobileNetV2SSLD('test_context')) - suite.addTest(TestMobileNetV2SSLD('test_single_pic')) - suite.addTest(TestMobileNetV2SSLD('test_batch')) - suite.addTest(TestMobileNetV2SSLD('test_ndarray')) - suite.addTest(TestMobileNetV2SSLD('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_mobilenet_v3_large_imagenet_ssld.py b/hub_module/tests/unittests/test_mobilenet_v3_large_imagenet_ssld.py deleted file mode 100644 index 41d672b5fb69fa22cd38aeed21153fb9e250d50a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_mobilenet_v3_large_imagenet_ssld.py +++ /dev/null @@ -1,91 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestMobileNetV3LargeSSLD(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.animal_classify = hub.Module( - name="mobilenet_v3_large_imagenet_ssld") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.animal_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.animal_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.animal_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.animal_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.animal_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.animal_classify.save_inference_model( - dirname='mobilenet_v3_large_imagenet_ssld', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestMobileNetV3LargeSSLD('test_context')) - suite.addTest(TestMobileNetV3LargeSSLD('test_single_pic')) - suite.addTest(TestMobileNetV3LargeSSLD('test_batch')) - suite.addTest(TestMobileNetV3LargeSSLD('test_ndarray')) - suite.addTest(TestMobileNetV3LargeSSLD('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_mobilenet_v3_small_imagenet_ssld.py b/hub_module/tests/unittests/test_mobilenet_v3_small_imagenet_ssld.py deleted file mode 100644 index 9fd2de7ebdd0d373d2c109920ee8870d37402baf..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_mobilenet_v3_small_imagenet_ssld.py +++ /dev/null @@ -1,91 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestMobileNetV3SmallSSLD(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.animal_classify = hub.Module( - name="mobilenet_v3_small_imagenet_ssld") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.animal_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.animal_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.animal_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.animal_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.animal_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.animal_classify.save_inference_model( - dirname='mobilenet_v3_small_imagenet_ssld', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestMobileNetV3SmallSSLD('test_context')) - suite.addTest(TestMobileNetV3SmallSSLD('test_single_pic')) - suite.addTest(TestMobileNetV3SmallSSLD('test_batch')) - suite.addTest(TestMobileNetV3SmallSSLD('test_ndarray')) - suite.addTest(TestMobileNetV3SmallSSLD('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_porn_detection_gru.py b/hub_module/tests/unittests/test_porn_detection_gru.py deleted file mode 100644 index 538e8b759b2a599fe486719aaa6d41214dc32ad8..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_porn_detection_gru.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddlehub as hub - - -class PornDetectionGRUTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='porn_detection_gru') - self.test_text = ["黄片下载", "打击黄牛党"] - self.results = [{ - 'text': '黄片下载', - 'porn_detection_label': 1, - 'porn_detection_key': 'porn', - 'porn_probs': 0.9751, - 'not_porn_probs': 0.0249 - }, - { - 'text': '打击黄牛党', - 'porn_detection_label': 0, - 'porn_detection_key': 'not_porn', - 'porn_probs': 0.0003, - 'not_porn_probs': 0.9997 - }] - self.labels = {"porn": 1, "not_porn": 0} - - def test_detection(self): - # test batch_size - results = self.module.detection( - texts=self.test_text, use_gpu=False, batch_size=1) - self.assertEqual(results, self.results) - results = self.module.detection( - texts=self.test_text, use_gpu=False, batch_size=10) - self.assertEqual(results, self.results) - - # test gpu - results = self.module.detection( - texts=self.test_text, use_gpu=True, batch_size=1) - self.assertEqual(results, self.results) - - def test_get_vocab_path(self): - true_vocab_path = os.path.join(self.module.directory, "assets", - "word_dict.txt") - vocab_path = self.module.get_vocab_path() - self.assertEqual(vocab_path, true_vocab_path) - - def test_get_labels(self): - labels = self.module.get_labels() - self.assertEqual(labels, self.labels) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_pyramidbox_face_detection.py b/hub_module/tests/unittests/test_pyramidbox_face_detection.py deleted file mode 100644 index 523c8447e3dc2f4d4f719520ba3a894c9c4bbd5e..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_pyramidbox_face_detection.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection' - - -class TestFaceDetector320(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.face_detector = hub.Module(name="pyramidbox_face_detection") - # self.face_detector = hub.Module(directory="pyramidbox_face_detection") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.face_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - print(pic_path, '\n') - result = self.face_detector.face_detection( - paths=[pic_path], use_gpu=True, visualization=True) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - for pic_path in pics_path_list: - result = self.face_detector.face_detection( - images=[cv2.imread(pic_path)], - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - print(result) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFaceDetector320('test_single_pic')) - suite.addTest(TestFaceDetector320('test_ndarray')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_pyramidbox_lite_mobile.py b/hub_module/tests/unittests/test_pyramidbox_lite_mobile.py deleted file mode 100644 index 1e433ceb981fa1db41de5877da7053cbbf1986cf..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_pyramidbox_lite_mobile.py +++ /dev/null @@ -1,82 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection/' - - -class TestPyramidBoxLiteMobile(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.face_detector = hub.Module(name="pyramidbox_lite_mobile") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.face_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.face_detector.face_detection( - paths=[pic_path], - use_gpu=True, - visualization=True, - shrink=0.5, - confs_threshold=0.6) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - im_list = list() - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - im_list.append(im) - result = self.face_detector.face_detection( - images=im_list, - output_dir='ndarray_output', - shrink=1, - confs_threshold=0.6, - use_gpu=True, - visualization=True) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.face_detector.save_inference_model( - dirname='pyramidbox_lite_mobile', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestPyramidBoxLiteMobile('test_single_pic')) - suite.addTest(TestPyramidBoxLiteMobile('test_ndarray')) - suite.addTest(TestPyramidBoxLiteMobile('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_pyramidbox_lite_mobile_mask.py b/hub_module/tests/unittests/test_pyramidbox_lite_mobile_mask.py deleted file mode 100644 index a6ee5a3c22f42a2aad82eb8fbaa92879155acc11..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_pyramidbox_lite_mobile_mask.py +++ /dev/null @@ -1,96 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection/' - - -class TestPyramidBoxLiteMobileMask(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.mask_detector = hub.Module(name="pyramidbox_lite_mobile_mask") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.mask_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - paths_list = [os.path.join(pic_dir, f) for f in os.listdir(pic_dir)] - print('\n') - for pic_path in paths_list: - print(pic_path) - result = self.mask_detector.face_detection( - paths=[pic_path], - use_gpu=True, - visualization=True, - use_multi_scale=True, - shrink=0.5, - confs_threshold=0.6) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - paths_list = [os.path.join(pic_dir, f) for f in os.listdir(pic_dir)] - result = self.mask_detector.face_detection( - paths=paths_list, - batch_size=5, - use_gpu=True, - visualization=True, - output_dir='batch_out', - use_multi_scale=True, - shrink=0.5, - confs_threshold=0.6) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - paths_list = [os.path.join(pic_dir, f) for f in os.listdir(pic_dir)] - pics_ndarray = list() - im_list = list() - for pic_path in paths_list: - im = cv2.imread(pic_path) - im_list.append(im) - result = self.mask_detector.face_detection( - images=im_list, - output_dir='ndarray_output', - shrink=1, - confs_threshold=0.6, - use_gpu=True, - visualization=True) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.mask_detector.save_inference_model( - dirname='pyramidbox_lite_mobile_mask_model', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestPyramidBoxLiteMobileMask('test_single_pic')) - suite.addTest(TestPyramidBoxLiteMobileMask('test_batch')) - suite.addTest(TestPyramidBoxLiteMobileMask('test_ndarray')) - suite.addTest(TestPyramidBoxLiteMobileMask('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_pyramidbox_lite_server.py b/hub_module/tests/unittests/test_pyramidbox_lite_server.py deleted file mode 100644 index a5a2a2580b5773f5c5a5e58884ee8e3311920d64..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_pyramidbox_lite_server.py +++ /dev/null @@ -1,79 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection/' - - -class TestPyramidBoxLiteServer(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.face_detector = hub.Module(name='pyramidbox_lite_server') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.face_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - for pic_path in pics_path_list: - result = self.face_detector.face_detection( - paths=[pic_path], - use_gpu=True, - visualization=True, - shrink=0.5, - confs_threshold=0.6) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.face_detector.face_detection( - images=[im], - output_dir='ndarray_output', - shrink=1, - confs_threshold=0.6, - use_gpu=True, - visualization=True) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.face_detector.save_inference_model( - dirname='pyramidbox_lite_server', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestPyramidBoxLiteServer('test_single_pic')) - suite.addTest(TestPyramidBoxLiteServer('test_ndarray')) - suite.addTest(TestPyramidBoxLiteServer('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_pyramidbox_lite_server_mask.py b/hub_module/tests/unittests/test_pyramidbox_lite_server_mask.py deleted file mode 100644 index e209366c6606457486aa2fb2038ac49dfff63401..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_pyramidbox_lite_server_mask.py +++ /dev/null @@ -1,96 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection/' - - -class TestPyramidBoxLiteServerMask(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.mask_detector = hub.Module(name="pyramidbox_lite_server_mask") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.mask_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - paths_list = [os.path.join(pic_dir, f) for f in os.listdir(pic_dir)] - print('\n') - for pic_path in paths_list: - print(pic_path) - result = self.mask_detector.face_detection( - paths=[pic_path], - use_gpu=True, - visualization=True, - use_multi_scale=True, - shrink=0.5, - confs_threshold=0.6) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - paths_list = [os.path.join(pic_dir, f) for f in os.listdir(pic_dir)] - result = self.mask_detector.face_detection( - paths=paths_list, - batch_size=5, - use_gpu=True, - visualization=True, - output_dir='batch_out', - use_multi_scale=True, - shrink=0.5, - confs_threshold=0.6) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - paths_list = [os.path.join(pic_dir, f) for f in os.listdir(pic_dir)] - pics_ndarray = list() - im_list = list() - for pic_path in paths_list: - im = cv2.imread(pic_path) - im_list.append(im) - result = self.mask_detector.face_detection( - images=im_list, - output_dir='ndarray_output', - shrink=1, - confs_threshold=0.6, - use_gpu=True, - visualization=True) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.mask_detector.save_inference_model( - dirname='pyramidbox_lite_server_mask_model', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestPyramidBoxLiteServerMask('test_single_pic')) - suite.addTest(TestPyramidBoxLiteServerMask('test_batch')) - suite.addTest(TestPyramidBoxLiteServerMask('test_ndarray')) - suite.addTest(TestPyramidBoxLiteServerMask('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_res2net101_vd_26w_4s_imagenet.py b/hub_module/tests/unittests/test_res2net101_vd_26w_4s_imagenet.py deleted file mode 100644 index 52f3daef91cc02acdd2196cd0d8925650dd2554c..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_res2net101_vd_26w_4s_imagenet.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestResNet50vdAnimal(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.classifier = hub.Module(name="res2net101_vd_26w_4s_imagenet") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.classifier = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.classifier.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.classifier.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.classifier.classification( - paths=pics_path_list, batch_size=3, use_gpu=False, top_k=2) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.classifier.classification( - images=[im], use_gpu=True, top_k=5) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.classifier.save_inference_model( - dirname='res2net101_vd_26w_4s_imagenet_model', - model_filename='__model__', - combined=False) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet50vdAnimal('test_context')) - suite.addTest(TestResNet50vdAnimal('test_single_pic')) - suite.addTest(TestResNet50vdAnimal('test_batch')) - suite.addTest(TestResNet50vdAnimal('test_ndarray')) - suite.addTest(TestResNet50vdAnimal('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet18_vd_imagenet.py b/hub_module/tests/unittests/test_resnet18_vd_imagenet.py deleted file mode 100644 index 54fa57e41f8794b1cfebb9542be12f72286c50c7..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet18_vd_imagenet.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestResnet18vdImagenet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.classifier = hub.Module(name='resnet18_vd_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.classifier = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.classifier.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.classifier.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.classifier.classification( - paths=pics_path_list, batch_size=3, use_gpu=False, top_k=2) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.classifier.classification( - images=[im], use_gpu=True, top_k=5) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.classifier.save_inference_model( - dirname='resnet18_vd_imagenet_model', - model_filename='model', - combined=False) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResnet18vdImagenet('test_context')) - suite.addTest(TestResnet18vdImagenet('test_single_pic')) - suite.addTest(TestResnet18vdImagenet('test_batch')) - suite.addTest(TestResnet18vdImagenet('test_ndarray')) - suite.addTest(TestResnet18vdImagenet('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet34.py b/hub_module/tests/unittests/test_resnet34.py deleted file mode 100644 index b7c45113d31fa0f56dc82165bcda7955cfe1644a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet34.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -class TestResNet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - # self.mobilenet_v1 = hub.Module(name="mobilenet_v1") - self.resnet = hub.Module(name='resnet34_v2_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.resnet = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - image = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - inputs, outputs, program = self.resnet.context( - input_image=image, - pretrained=False, - trainable=True, - param_prefix='BaiDu') - image = inputs["image"] - body_feats = outputs['body_feats'] - - def test_classification(self): - with fluid.program_guard(self.test_prog): - image_dir = "../image_dataset/pascal_voc/" - airplane = cv2.imread(os.path.join( - image_dir, 'airplane.jpg')).astype('float32') - airplanes = np.array([airplane, airplane]) - print(airplanes) - classification_results = self.resnet.classification( - paths=[ - os.path.join(image_dir, 'bird.jpg'), - os.path.join(image_dir, 'bike.jpg'), - os.path.join(image_dir, 'cowboy.jpg'), - os.path.join(image_dir, 'sheep.jpg'), - os.path.join(image_dir, 'train.jpg') - ], - images=airplanes, - batch_size=2) - print(classification_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet('test_context')) - suite.addTest(TestResNet('test_classification')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet50.py b/hub_module/tests/unittests/test_resnet50.py deleted file mode 100644 index 69e8b9ed1b9b3d6537ce4edd57c6fc6910299f70..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet50.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -class TestResNet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - # self.mobilenet_v1 = hub.Module(name="mobilenet_v1") - self.resnet = hub.Module(name='resnet50_v2_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.resnet = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - image = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - inputs, outputs, program = self.resnet.context( - input_image=image, - pretrained=False, - trainable=True, - param_prefix='BaiDu') - image = inputs["image"] - body_feats = outputs['body_feats'] - - def test_classification(self): - with fluid.program_guard(self.test_prog): - image_dir = "../image_dataset/pascal_voc/" - airplane = cv2.imread(os.path.join( - image_dir, 'airplane.jpg')).astype('float32') - airplanes = np.array([airplane, airplane]) - classification_results = self.resnet.classification( - paths=[ - os.path.join(image_dir, 'bird.jpg'), - os.path.join(image_dir, 'bike.jpg'), - os.path.join(image_dir, 'cowboy.jpg'), - os.path.join(image_dir, 'sheep.jpg'), - os.path.join(image_dir, 'train.jpg') - ], - images=airplanes, - batch_size=2) - print(classification_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet('test_context')) - suite.addTest(TestResNet('test_classification')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet50_vd_animals.py b/hub_module/tests/unittests/test_resnet50_vd_animals.py deleted file mode 100644 index 1854d9cf512d76e21c9c80de348c9f345afc0485..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet50_vd_animals.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestResNet50vdAnimal(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.animal_classify = hub.Module(name="resnet50_vd_animals") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.animal_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.animal_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.animal_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.animal_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False, top_k=2) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.animal_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False, top_k=5) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.animal_classify.save_inference_model( - dirname='resnet50_vd_animals', - model_filename='model', - combined=False) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet50vdAnimal('test_context')) - suite.addTest(TestResNet50vdAnimal('test_single_pic')) - suite.addTest(TestResNet50vdAnimal('test_batch')) - suite.addTest(TestResNet50vdAnimal('test_ndarray')) - suite.addTest(TestResNet50vdAnimal('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet50_vd_dishes.py b/hub_module/tests/unittests/test_resnet50_vd_dishes.py deleted file mode 100644 index 16dc372577be75762e5ad3f0fbad07b9e7527a77..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet50_vd_dishes.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/dish/' - - -class TestResNet50vdDish(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.dish_classify = hub.Module(name="resnet50_vd_dishes") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.dish_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.dish_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.dish_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.dish_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.dish_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.dish_classify.save_inference_model( - dirname='resnet50_vd_dishes', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet50vdDish('test_context')) - suite.addTest(TestResNet50vdDish('test_single_pic')) - suite.addTest(TestResNet50vdDish('test_batch')) - suite.addTest(TestResNet50vdDish('test_ndarray')) - suite.addTest(TestResNet50vdDish('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet50_vd_imagenet_ssld.py b/hub_module/tests/unittests/test_resnet50_vd_imagenet_ssld.py deleted file mode 100644 index 8b0f880878746bbefeedb3c2e7f07b57381902fd..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet50_vd_imagenet_ssld.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestResNet50vdSSLD(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.animal_classify = hub.Module(name="resnet50_vd_imagenet_ssld") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.animal_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.animal_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.animal_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.animal_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False, top_k=2) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.animal_classify.classification( - images=np.expand_dims(im, axis=0), use_gpu=False, top_k=5) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.animal_classify.save_inference_model( - dirname='resnet50_vd_imagenet_ssld', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet50vdSSLD('test_context')) - suite.addTest(TestResNet50vdSSLD('test_single_pic')) - suite.addTest(TestResNet50vdSSLD('test_batch')) - suite.addTest(TestResNet50vdSSLD('test_ndarray')) - suite.addTest(TestResNet50vdSSLD('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_resnet50_vd_wildanimals.py b/hub_module/tests/unittests/test_resnet50_vd_wildanimals.py deleted file mode 100644 index e5fc9c01ff916a65c3df3270ada6d9ac1b0873b5..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_resnet50_vd_wildanimals.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestResNet50vdWildAnimals(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.wildanimals_classify = hub.Module(name="resnet50_vd_wildanimals") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.wildanimals_classify = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.wildanimals_classify.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.wildanimals_classify.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.wildanimals_classify.classification( - paths=pics_path_list, batch_size=3, use_gpu=False) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.wildanimals_classify.classification( - images=[im], use_gpu=False) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.wildanimals_classify.save_inference_model( - dirname='resnet50_vd_wildanimals', - model_filename=None, - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestResNet50vdWildAnimals('test_context')) - suite.addTest(TestResNet50vdWildAnimals('test_single_pic')) - suite.addTest(TestResNet50vdWildAnimals('test_batch')) - suite.addTest(TestResNet50vdWildAnimals('test_ndarray')) - suite.addTest(TestResNet50vdWildAnimals('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_retinanet_resnet50_fpn.py b/hub_module/tests/unittests/test_retinanet_resnet50_fpn.py deleted file mode 100644 index 60aeeda2fb94fb6c34c3b14000c6ce38d56fc31e..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_retinanet_resnet50_fpn.py +++ /dev/null @@ -1,60 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -class TestRetinaNet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.retinanet = hub.Module(name="retinanet_resnet50_fpn_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.retinanet = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - inputs, outputs, program = self.retinanet.context( - pretrained=False, trainable=True) - image = inputs["image"] - im_info = inputs["im_info"] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - image_dir = '../image_dataset/object_detection' - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - detection_results = self.retinanet.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2, - use_gpu=True) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestRetinaNet('test_object_detection')) - suite.addTest(TestRetinaNet('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_roberta_large.py b/hub_module/tests/unittests/test_roberta_large.py deleted file mode 100644 index 0110c2555d522ebfad55c2a522f1d2952a1af870..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_roberta_large.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import numpy as np -import paddlehub as hub - - -class RoBERTaChnLargeTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='chinese-roberta-wwm-ext-large') - self.test_text = [[ - '飞桨(PaddlePaddle)是国内开源产业级深度学习平台', 'PaddleHub是飞桨生态的预训练模型应用工具' - ], ["飞浆PaddleHub"]] - - def test_get_embedding(self): - # test batch_size - results = self.module.get_embedding( - texts=self.test_text, use_gpu=False, batch_size=1) - results_2 = self.module.get_embedding( - texts=self.test_text, use_gpu=False, batch_size=10) - # 2 sample results - self.assertEqual(len(results), 2) - self.assertEqual(len(results_2), 2) - # sequence embedding and token embedding results per sample - self.assertEqual(len(results[0]), 2) - self.assertEqual(len(results_2[0]), 2) - # sequence embedding shape - self.assertEqual(results[0][0].shape, (1024, )) - self.assertEqual(results_2[0][0].shape, (1024, )) - # token embedding shape, max_seq_len is 512 - self.assertEqual(results[0][1].shape, (512, 1024)) - self.assertEqual(results_2[0][1].shape, (512, 1024)) - - # test gpu - results_3 = self.module.get_embedding( - texts=self.test_text, use_gpu=True, batch_size=1) - diff = np.abs(results[0][0] - results_3[0][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[0][1] - results_3[0][1]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][0] - results_3[1][0]) - self.assertTrue((diff < 1e-6).all) - diff = np.abs(results[1][1] - results_3[1][1]) - self.assertTrue((diff < 1e-6).all) - - def test_get_params_layer(self): - self.module.context() - layers = self.module.get_params_layer() - layers = list(set(layers.values())) - true_layers = [i for i in range(24)] - self.assertEqual(layers, true_layers) - - def test_get_spm_path(self): - self.assertEqual(self.module.get_spm_path(), None) - - def test_get_word_dict_path(self): - self.assertEqual(self.module.get_word_dict_path(), None) - - def test_get_vocab_path(self): - vocab_path = self.module.get_vocab_path() - true_vocab_path = os.path.join(self.module.directory, "assets", - "vocab.txt") - self.assertEqual(vocab_path, true_vocab_path) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_se_resnet18_vd_imagenet.py b/hub_module/tests/unittests/test_se_resnet18_vd_imagenet.py deleted file mode 100644 index 36bf299f4b8a4059eacd6485c9bd159ffe02521f..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_se_resnet18_vd_imagenet.py +++ /dev/null @@ -1,89 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/classification/animals/' - - -class TestSEResnet18vdImagenet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.classifier = hub.Module(name='se_resnet18_vd_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.classifier = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - self.classifier.context(pretrained=True) - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - for pic_path in pics_path_list: - print(pic_path) - result = self.classifier.classification( - paths=[pic_path], use_gpu=False) - print(result) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - print('\n') - result = self.classifier.classification( - paths=pics_path_list, batch_size=3, use_gpu=False, top_k=2) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - print('\n') - for pic_path in pics_path_list: - im = cv2.cvtColor(cv2.imread(pic_path), cv2.COLOR_BGR2RGB) - result = self.classifier.classification( - images=[im], use_gpu=True, top_k=5) - print(result) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.classifier.save_inference_model( - dirname='se_resnet18_vd_imagenet_model', - model_filename='model', - combined=False) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestSEResnet18vdImagenet('test_single_pic')) - suite.addTest(TestSEResnet18vdImagenet('test_batch')) - suite.addTest(TestSEResnet18vdImagenet('test_ndarray')) - suite.addTest(TestSEResnet18vdImagenet('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_senta_bilstm.py b/hub_module/tests/unittests/test_senta_bilstm.py deleted file mode 100644 index b3f1cd2e75f540dd693fb71cb87f763d15d7e6de..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_senta_bilstm.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from unittest import TestCase, main -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddlehub as hub - - -class SentaTestCase(TestCase): - def setUp(self): - self.module = hub.Module(name='senta_bilstm') - self.test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - self.results = [{ - 'text': '这家餐厅很好吃', - 'sentiment_label': 1, - 'sentiment_key': 'positive', - 'positive_probs': 0.9407, - 'negative_probs': 0.0593 - }, - { - 'text': '这部电影真的很差劲', - 'sentiment_label': 0, - 'sentiment_key': 'negative', - 'positive_probs': 0.02, - 'negative_probs': 0.98 - }] - self.labels = {"positive": 1, "negative": 0} - - def test_sentiment_classify(self): - # test batch_size - results = self.module.sentiment_classify( - texts=self.test_text, use_gpu=False, batch_size=1) - self.assertEqual(results, self.results) - results = self.module.sentiment_classify( - texts=self.test_text, use_gpu=False, batch_size=10) - self.assertEqual(results, self.results) - - # test gpu - results = self.module.sentiment_classify( - texts=self.test_text, use_gpu=True, batch_size=1) - self.assertEqual(results, self.results) - - def test_get_vocab_path(self): - true_vocab_path = os.path.join(self.module.directory, "assets", - "vocab.txt") - vocab_path = self.module.get_vocab_path() - self.assertEqual(vocab_path, true_vocab_path) - - def test_get_labels(self): - labels = self.module.get_labels() - self.assertEqual(labels, self.labels) - - -if __name__ == '__main__': - main() diff --git a/hub_module/tests/unittests/test_ssd_mobilenet_v1_pascal.py b/hub_module/tests/unittests/test_ssd_mobilenet_v1_pascal.py deleted file mode 100644 index 45e13da682015a9aa3e6f65cc2e9f9a9a20be51a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ssd_mobilenet_v1_pascal.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/pascal_voc/' - - -class TestSSDMobileNet(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.ssd = hub.Module(name="ssd_mobilenet_v1_pascal") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.ssd = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.ssd.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - body_features = outputs['body_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - airplane = cv2.imread(os.path.join( - image_dir, 'airplane.jpg')).astype('float32') - airplanes = [airplane, airplane] - detection_results = self.ssd.object_detection( - paths=[ - os.path.join(image_dir, 'bird.jpg'), - os.path.join(image_dir, 'bike.jpg'), - os.path.join(image_dir, 'cowboy.jpg'), - os.path.join(image_dir, 'sheep.jpg'), - os.path.join(image_dir, 'train.jpg') - ], - images=airplanes, - batch_size=1) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestSSDMobileNet('test_context')) - suite.addTest(TestSSDMobileNet('test_object_detection')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_ssd_vgg16_300_coco2017.py b/hub_module/tests/unittests/test_ssd_vgg16_300_coco2017.py deleted file mode 100644 index 6e9cb8826b15996d631cef7426788ff06095f4a1..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ssd_vgg16_300_coco2017.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestSSDVGG300(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.ssd = hub.Module(name="ssd_vgg16_300_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.ssd = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.ssd.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - body_features = outputs['body_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - ## only paths - print( - self.ssd.object_detection( - paths=[os.path.join(image_dir, 'cat.jpg')])) - ## only images - print(self.ssd.object_detection(images=zebras)) - ## paths and images - print( - self.ssd.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2, - score_thresh=0.5)) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestSSDVGG300('test_object_detection')) - suite.addTest(TestSSDVGG300('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_ssd_vgg16_512_coco2017.py b/hub_module/tests/unittests/test_ssd_vgg16_512_coco2017.py deleted file mode 100644 index b47ee7bf695ac488921393614876e449b9555ebe..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ssd_vgg16_512_coco2017.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestSSDVGG512(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.ssd = hub.Module(name="ssd_vgg16_512_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.ssd = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.ssd.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - body_features = outputs['body_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - ## only paths - print( - self.ssd.object_detection( - paths=[os.path.join(image_dir, 'cat.jpg')])) - ## only images - print(self.ssd.object_detection(images=zebras)) - ## paths and images - print( - self.ssd.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2, - score_thresh=0.5)) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestSSDVGG512('test_object_detection')) - suite.addTest(TestSSDVGG512('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_stylepro_artistic.py b/hub_module/tests/unittests/test_stylepro_artistic.py deleted file mode 100644 index 6357e504eafbfad5958775b61362169fd3d68628..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_stylepro_artistic.py +++ /dev/null @@ -1,116 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -content_dir = '../image_dataset/style_tranfer/content/' -style_dir = '../image_dataset/style_tranfer/style/' - - -class TestStyleProjection(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.style_projection = hub.Module(name="stylepro_artistic") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.style_projection = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_style(self): - with fluid.program_guard(self.test_prog): - content_paths = [ - os.path.join(content_dir, f) for f in os.listdir(content_dir) - ] - style_paths = [ - os.path.join(style_dir, f) for f in os.listdir(style_dir) - ] - for style_path in style_paths: - t1 = time.time() - self.style_projection.style_transfer( - paths=[{ - 'content': content_paths[0], - 'styles': [style_path] - }], - alpha=0.8, - use_gpu=True) - t2 = time.time() - print('\nCost time: {}'.format(t2 - t1)) - - def test_multiple_styles(self): - with fluid.program_guard(self.test_prog): - content_path = os.path.join(content_dir, 'chicago.jpg') - style_paths = [ - os.path.join(style_dir, f) for f in os.listdir(style_dir) - ] - for j in range(len(style_paths) - 1): - res = self.style_projection.style_transfer( - paths=[{ - 'content': content_path, - 'styles': [style_paths[j], style_paths[j + 1]], - 'weights': [1, 2] - }], - alpha=0.8, - use_gpu=True, - visualization=True) - print('#' * 100) - print(res) - print('#' * 100) - - def test_input_ndarray(self): - with fluid.program_guard(self.test_prog): - content_arr = cv2.imread(os.path.join(content_dir, 'chicago.jpg')) - content_arr = cv2.cvtColor(content_arr, cv2.COLOR_BGR2RGB) - style_arrs_BGR = [ - cv2.imread(os.path.join(style_dir, f)) - for f in os.listdir(style_dir) - ] - style_arrs_list = [ - cv2.cvtColor(arr, cv2.COLOR_BGR2RGB) for arr in style_arrs_BGR - ] - for j in range(len(style_arrs_list) - 1): - self.style_projection.style_transfer( - images=[{ - 'content': - content_arr, - 'styles': [style_arrs_list[j], style_arrs_list[j + 1]] - }], - alpha=0.8, - use_gpu=True, - output_dir='transfer_out', - visualization=True) - - def test_save_inference_model(self): - with fluid.program_guard(self.test_prog): - self.style_projection.save_inference_model( - dirname='stylepro_artistic', - model_filename='model', - combined=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestStyleProjection('test_single_style')) - suite.addTest(TestStyleProjection('test_multiple_styles')) - suite.addTest(TestStyleProjection('test_input_ndarray')) - suite.addTest(TestStyleProjection('test_save_inference_model')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_ultra_light_fast_generic_face_detector_1mb_320.py b/hub_module/tests/unittests/test_ultra_light_fast_generic_face_detector_1mb_320.py deleted file mode 100644 index 8972243898c308305ef1f2adaa77d6a29dd6d918..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ultra_light_fast_generic_face_detector_1mb_320.py +++ /dev/null @@ -1,88 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection' - - -class TestFaceDetector320(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.face_detector = hub.Module( - name="ultra_light_fast_generic_face_detector_1mb_320") - self.face_detector._initialize() - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.face_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - t1 = time.time() - for pic_path in pics_path_list: - result = self.face_detector.face_detection( - paths=[pic_path], use_gpu=True, visualization=True) - t2 = time.time() - print('It cost {} seconds when batch_size=1.'.format(t2 - t1)) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - t1 = time.time() - result = self.face_detector.face_detection( - paths=pics_path_list, - batch_size=5, - output_dir='batch_output', - use_gpu=True, - visualization=True) - t2 = time.time() - print('It cost {} seconds when batch_size=5.'.format(t2 - t1)) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.face_detector.face_detection( - images=np.expand_dims(im, axis=0), - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFaceDetector320('test_single_pic')) - suite.addTest(TestFaceDetector320('test_batch')) - suite.addTest(TestFaceDetector320('test_ndarray')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_ultra_light_fast_generic_face_detector_1mb_640.py b/hub_module/tests/unittests/test_ultra_light_fast_generic_face_detector_1mb_640.py deleted file mode 100644 index 81f7a6fb3caad4ca1f226542becd6525f4c8b670..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_ultra_light_fast_generic_face_detector_1mb_640.py +++ /dev/null @@ -1,88 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -pic_dir = '../image_dataset/face_detection' - - -class TestFaceDetector640(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.\n""" - self.face_detector = hub.Module( - name="ultra_light_fast_generic_face_detector_1mb_640") - self.face_detector._initialize() - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.\n""" - self.face_detector = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_single_pic(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - t1 = time.time() - for pic_path in pics_path_list: - result = self.face_detector.face_detection( - paths=[pic_path], use_gpu=True, visualization=True) - t2 = time.time() - print('It cost {} seconds when batch_size=1.'.format(t2 - t1)) - - def test_batch(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - t1 = time.time() - result = self.face_detector.face_detection( - paths=pics_path_list, - batch_size=5, - output_dir='batch_output', - use_gpu=True, - visualization=True) - t2 = time.time() - print('It cost {} seconds when batch_size=5.'.format(t2 - t1)) - print(result) - - def test_ndarray(self): - with fluid.program_guard(self.test_prog): - pics_path_list = [ - os.path.join(pic_dir, f) for f in os.listdir(pic_dir) - ] - pics_ndarray = list() - for pic_path in pics_path_list: - im = cv2.imread(pic_path) - result = self.face_detector.face_detection( - images=np.expand_dims(im, axis=0), - output_dir='ndarray_output', - use_gpu=True, - visualization=True) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestFaceDetector640('test_single_pic')) - suite.addTest(TestFaceDetector640('test_batch')) - suite.addTest(TestFaceDetector640('test_ndarray')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_vgg16.py b/hub_module/tests/unittests/test_vgg16.py deleted file mode 100644 index 153fb6d7229eada7f29a57b5fcaa55ea63b8983a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_vgg16.py +++ /dev/null @@ -1,67 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - - -class TestVGG16(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - # self.mobilenet_v1 = hub.Module(name="mobilenet_v1") - self.vgg16 = hub.Module(name='vgg16_imagenet') - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.vgg166 = None - - def setUp(self): - "Call setUp() to prepare environment\n" - self.test_prog = fluid.Program() - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - image = fluid.layers.data( - name='image', shape=[3, 300, 300], dtype='float32') - inputs, outputs, program = self.vgg16.context( - input_image=image, - pretrained=False, - trainable=True, - param_prefix='BaiDu') - image = inputs["image"] - body_feats = outputs['body_feats'] - - def test_classification(self): - with fluid.program_guard(self.test_prog): - image_dir = "../image_dataset/pascal_voc/" - airplane = cv2.imread(os.path.join( - image_dir, 'airplane.jpg')).astype('float32') - airplanes = np.array([airplane, airplane]) - classification_results = self.vgg16.classification( - paths=[ - os.path.join(image_dir, 'bird.jpg'), - os.path.join(image_dir, 'bike.jpg'), - os.path.join(image_dir, 'cowboy.jpg'), - os.path.join(image_dir, 'sheep.jpg'), - os.path.join(image_dir, 'train.jpg') - ], - images=airplanes, - batch_size=2) - print(classification_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - #suite.addTest(TestVGG16('test_context')) - suite.addTest(TestVGG16('test_classification')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_videotag_tsn_lstm.py b/hub_module/tests/unittests/test_videotag_tsn_lstm.py deleted file mode 100644 index 0c28a8ea8de3775d49b5690dc567bb253c610d91..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_videotag_tsn_lstm.py +++ /dev/null @@ -1,51 +0,0 @@ -# coding=utf-8 -import unittest -import paddlehub as hub - - -class TestVideoTag(unittest.TestCase): - def setUp(self): - "Call setUp() to prepare environment\n" - self.module = hub.Module(name='videotag_tsn_lstm') - self.test_video = [ - "../video_dataset/classification/1.mp4", - "../video_dataset/classification/2.mp4" - ] - - def test_classification(self): - default_expect1 = { - '训练': 0.9771281480789185, - '蹲': 0.9389840960502625, - '杠铃': 0.8554490804672241, - '健身房': 0.8479971885681152 - } - default_expect2 = {'舞蹈': 0.8504238724708557} - for use_gpu in [True, False]: - for threshold in [0.5, 0.9]: - for top_k in [10, 1]: - expect1 = {} - expect2 = {} - for key, value in default_expect1.items(): - if value >= threshold: - expect1[key] = value - if len(expect1.keys()) >= top_k: - break - for key, value in default_expect2.items(): - if value >= threshold: - expect2[key] = value - if len(expect2.keys()) >= top_k: - break - results = self.module.classify( - paths=self.test_video, - use_gpu=use_gpu, - threshold=threshold, - top_k=top_k) - for result in results: - if '1.mp4' in result['path']: - self.assertEqual(result['prediction'], expect1) - else: - self.assertEqual(result['prediction'], expect2) - - -if __name__ == "__main__": - unittest.main() diff --git a/hub_module/tests/unittests/test_yolov3_darknet53_coco2017.py b/hub_module/tests/unittests/test_yolov3_darknet53_coco2017.py deleted file mode 100644 index ff08b8c04caf32df06056bc1b4b3ee827f6ef7c5..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_yolov3_darknet53_coco2017.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestYoloV3DarkNet53(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.yolov3 = hub.Module(name="yolov3_darknet53_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.yolov3 = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.yolov3.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - head_features = outputs['head_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - detection_results = self.yolov3.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestYoloV3DarkNet53('test_object_detection')) - suite.addTest(TestYoloV3DarkNet53('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_yolov3_darknet53_pedestrian.py b/hub_module/tests/unittests/test_yolov3_darknet53_pedestrian.py deleted file mode 100644 index 7f686091cdfe9b8782b598cd026d73583d579ed4..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_yolov3_darknet53_pedestrian.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/face_detection/' - - -class TestYOLOv3DarkNet53Pedestrian(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.yolov3_pedestrian_detect = hub.Module( - name="yolov3_darknet53_pedestrian") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.yolov3_pedestrian_detect = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.yolov3_pedestrian_detect.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - head_features = outputs['head_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - paths = list() - for file_path in os.listdir(image_dir): - paths.append(os.path.join(image_dir, file_path)) - - detection_results = self.yolov3_pedestrian_detect.object_detection( - paths=paths, batch_size=3, visualization=True) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestYOLOv3DarkNet53Pedestrian('test_object_detection')) - suite.addTest(TestYOLOv3DarkNet53Pedestrian('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_yolov3_darknet53_vehicles.py b/hub_module/tests/unittests/test_yolov3_darknet53_vehicles.py deleted file mode 100644 index 1798a828cb9a67f7e2d13c6a89ce92775250da0a..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_yolov3_darknet53_vehicles.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/vehicles/' - - -class TestYOLOv3DarkNet53Vehicles(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.yolov3_vehicles_detect = hub.Module( - name="yolov3_darknet53_vehicles") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.yolov3_vehicles_detect = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.yolov3_vehicles_detect.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - head_features = outputs['head_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - paths = list() - for file_path in os.listdir(image_dir): - paths.append(os.path.join(image_dir, file_path)) - - detection_results = self.yolov3_vehicles_detect.object_detection( - paths=paths, batch_size=3, visualization=True) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestYOLOv3DarkNet53Vehicles('test_object_detection')) - suite.addTest(TestYOLOv3DarkNet53Vehicles('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_yolov3_mobilenet_v1_coco2017.py b/hub_module/tests/unittests/test_yolov3_mobilenet_v1_coco2017.py deleted file mode 100644 index b9ab17806256ca1e0e3e96257947de538c195eef..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_yolov3_mobilenet_v1_coco2017.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestYoloV3MoobileNetV1(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.yolov3 = hub.Module(name="yolov3_mobilenet_v1_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.yolov3 = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.yolov3.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - head_features = outputs['head_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - detection_results = self.yolov3.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestYoloV3MoobileNetV1('test_object_detection')) - suite.addTest(TestYoloV3MoobileNetV1('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/unittests/test_yolov3_resnet34_coco2017.py b/hub_module/tests/unittests/test_yolov3_resnet34_coco2017.py deleted file mode 100644 index 58cf6df62a8751ebe3fee6a4303117429921c80f..0000000000000000000000000000000000000000 --- a/hub_module/tests/unittests/test_yolov3_resnet34_coco2017.py +++ /dev/null @@ -1,65 +0,0 @@ -# coding=utf-8 -import os -import unittest - -import cv2 -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - -image_dir = '../image_dataset/object_detection/' - - -class TestYoloV3ResNet34(unittest.TestCase): - @classmethod - def setUpClass(self): - """Prepare the environment once before execution of all tests.""" - self.yolov3 = hub.Module(name="yolov3_resnet34_coco2017") - - @classmethod - def tearDownClass(self): - """clean up the environment after the execution of all tests.""" - self.yolov3 = None - - def setUp(self): - self.test_prog = fluid.Program() - "Call setUp() to prepare environment\n" - - def tearDown(self): - "Call tearDown to restore environment.\n" - self.test_prog = None - - def test_context(self): - with fluid.program_guard(self.test_prog): - get_prediction = True - inputs, outputs, program = self.yolov3.context( - pretrained=True, trainable=True, get_prediction=get_prediction) - image = inputs["image"] - im_size = inputs["im_size"] - if get_prediction: - bbox_out = outputs['bbox_out'] - else: - head_features = outputs['head_features'] - - def test_object_detection(self): - with fluid.program_guard(self.test_prog): - zebra = cv2.imread(os.path.join(image_dir, - 'zebra.jpg')).astype('float32') - zebras = [zebra, zebra] - detection_results = self.yolov3.object_detection( - paths=[ - os.path.join(image_dir, 'cat.jpg'), - os.path.join(image_dir, 'dog.jpg'), - os.path.join(image_dir, 'giraffe.jpg') - ], - images=zebras, - batch_size=2) - print(detection_results) - - -if __name__ == "__main__": - suite = unittest.TestSuite() - suite.addTest(TestYoloV3ResNet34('test_object_detection')) - suite.addTest(TestYoloV3ResNet34('test_context')) - runner = unittest.TextTestRunner(verbosity=2) - runner.run(suite) diff --git a/hub_module/tests/video_dataset/classification/1.mp4 b/hub_module/tests/video_dataset/classification/1.mp4 deleted file mode 100644 index 391b057e3bd99c0bb0c1bf8ed95194dd984fd23a..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/video_dataset/classification/1.mp4 and /dev/null differ diff --git a/hub_module/tests/video_dataset/classification/2.mp4 b/hub_module/tests/video_dataset/classification/2.mp4 deleted file mode 100644 index e276117a207a860769b8c5a4b5f207e743a45ccc..0000000000000000000000000000000000000000 Binary files a/hub_module/tests/video_dataset/classification/2.mp4 and /dev/null differ diff --git a/hub_module/modules/README.md b/modules/README.md similarity index 100% rename from hub_module/modules/README.md rename to modules/README.md diff --git a/hub_module/modules/audio/README.md b/modules/audio/README.md similarity index 100% rename from hub_module/modules/audio/README.md rename to modules/audio/README.md diff --git a/hub_module/modules/audio/tts/deepvoice3_ljspeech/README.md b/modules/audio/tts/deepvoice3_ljspeech/README.md similarity index 100% rename from hub_module/modules/audio/tts/deepvoice3_ljspeech/README.md rename to modules/audio/tts/deepvoice3_ljspeech/README.md diff --git a/autodl/DELTA/models/__init__.py b/modules/audio/tts/deepvoice3_ljspeech/__init__.py similarity index 100% rename from autodl/DELTA/models/__init__.py rename to modules/audio/tts/deepvoice3_ljspeech/__init__.py diff --git a/modules/audio/tts/deepvoice3_ljspeech/module.py b/modules/audio/tts/deepvoice3_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..580b8580381bd5112dac42e8bb8bc24be215cd60 --- /dev/null +++ b/modules/audio/tts/deepvoice3_ljspeech/module.py @@ -0,0 +1,324 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse +import ast +import importlib.util + +import nltk +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.dygraph as dg +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +from paddlehub.common.dir import THIRD_PARTY_HOME +from paddlehub.common.utils import mkdir +from paddlehub.common.downloader import default_downloader +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError + +lack_dependency = [] +for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]: + if not importlib.util.find_spec(dependency): + lack_dependency.append(dependency) + +# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. +_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" +_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" +nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") +tokenizers_path = os.path.join(nltk_path, "tokenizers") +corpora_path = os.path.join(nltk_path, "corpora") +punkt_path = os.path.join(tokenizers_path, "punkt") +cmudict_path = os.path.join(corpora_path, "cmudict") + +if not os.path.exists(punkt_path): + default_downloader.download_file_and_uncompress(url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) +if not os.path.exists(cmudict_path): + default_downloader.download_file_and_uncompress(url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) +nltk.data.path.append(nltk_path) + +if not lack_dependency: + import soundfile as sf + import librosa + import ruamel.yaml + from parakeet.utils import io + from parakeet.g2p import en + from parakeet.models.deepvoice3 import Encoder, Decoder, PostNet, SpectraNet + from parakeet.models.waveflow import WaveFlowModule + from parakeet.models.deepvoice3.weight_norm_hook import remove_weight_norm +else: + raise ImportError( + "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" + % ", ".join(lack_dependency)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +class WaveflowVocoder(object): + def __init__(self, config_path, checkpoint_path): + with open(config_path, 'rt') as f: + config = ruamel.yaml.safe_load(f) + ns = argparse.Namespace() + for k, v in config.items(): + setattr(ns, k, v) + ns.use_fp16 = False + + self.model = WaveFlowModule(ns) + io.load_parameters(self.model, checkpoint_path=checkpoint_path) + + def __call__(self, mel): + with dg.no_grad(): + self.model.eval() + audio = self.model.synthesize(mel) + self.model.train() + return audio + + +class GriffinLimVocoder(object): + def __init__(self, sharpening_factor=1.4, sample_rate=22050, n_fft=1024, win_length=1024, hop_length=256): + self.sample_rate = sample_rate + self.n_fft = n_fft + self.sharpening_factor = sharpening_factor + self.win_length = win_length + self.hop_length = hop_length + + def __call__(self, mel): + spec = librosa.feature.inverse.mel_to_stft( + np.exp(mel), sr=self.sample_rate, n_fft=self.n_fft, fmin=0, fmax=8000.0, power=1.0) + audio = librosa.core.griffinlim( + spec**self.sharpening_factor, win_length=self.win_length, hop_length=self.hop_length) + return audio + + +@moduleinfo( + name="deepvoice3_ljspeech", + version="1.0.0", + summary="Deep Voice 3, a fully-convolutional attention-based neural text-to-speech (TTS) system.", + author="paddlepaddle", + author_email="", + type="nlp/tts", +) +class DeepVoice3(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", "step-1780000") + self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", "vocoder", "step-2000000") + self.waveflow_config_path = os.path.join(self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") + tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", "ljspeech.yaml") + with open(tts_checkpoint_path) as f: + self.tts_config = ruamel.yaml.safe_load(f) + + with fluid.dygraph.guard(fluid.CPUPlace()): + char_embedding = dg.Embedding((en.n_vocab, self.tts_config["char_dim"])) + multi_speaker = self.tts_config["n_speakers"] > 1 + speaker_embedding = dg.Embedding((self.tts_config["n_speakers"], self.tts_config["speaker_dim"])) \ + if multi_speaker else None + encoder = Encoder( + self.tts_config["encoder_layers"], + self.tts_config["char_dim"], + self.tts_config["encoder_dim"], + self.tts_config["kernel_size"], + has_bias=multi_speaker, + bias_dim=self.tts_config["speaker_dim"], + keep_prob=1.0 - self.tts_config["dropout"]) + decoder = Decoder( + self.tts_config["n_mels"], + self.tts_config["reduction_factor"], + list(self.tts_config["prenet_sizes"]) + [self.tts_config["char_dim"]], + self.tts_config["decoder_layers"], + self.tts_config["kernel_size"], + self.tts_config["attention_dim"], + position_encoding_weight=self.tts_config["position_weight"], + omega=self.tts_config["position_rate"], + has_bias=multi_speaker, + bias_dim=self.tts_config["speaker_dim"], + keep_prob=1.0 - self.tts_config["dropout"]) + postnet = PostNet( + self.tts_config["postnet_layers"], + self.tts_config["char_dim"], + self.tts_config["postnet_dim"], + self.tts_config["kernel_size"], + self.tts_config["n_mels"], + self.tts_config["reduction_factor"], + has_bias=multi_speaker, + bias_dim=self.tts_config["speaker_dim"], + keep_prob=1.0 - self.tts_config["dropout"]) + self.tts_model = SpectraNet(char_embedding, speaker_embedding, encoder, decoder, postnet) + io.load_parameters(model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) + for name, layer in self.tts_model.named_sublayers(): + try: + remove_weight_norm(layer) + except ValueError: + # this layer has not weight norm hook + pass + + self.waveflow = WaveflowVocoder( + config_path=self.waveflow_config_path, checkpoint_path=self.waveflow_checkpoint_path) + self.griffin = GriffinLimVocoder( + sharpening_factor=self.tts_config["sharpening_factor"], + sample_rate=self.tts_config["sample_rate"], + n_fft=self.tts_config["n_fft"], + win_length=self.tts_config["win_length"], + hop_length=self.tts_config["hop_length"]) + + def synthesize(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Get the synthetic wavs from the texts. + + Args: + texts(list): the input texts to be predicted. + use_gpu(bool): whether use gpu to predict or not + vocoder(str): the vocoder name, "griffin-lim" or "waveflow" + + Returns: + wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. + sample_rate(int): the audio sample rate. + """ + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + + if texts and isinstance(texts, list): + predicted_data = texts + else: + raise ValueError("The input data is inconsistent with expectations.") + + wavs = [] + with fluid.dygraph.guard(place): + self.tts_model.eval() + self.waveflow.model.eval() + monotonic_layers = [4] + for text in predicted_data: + # init input + logger.info("Processing sentence: %s" % text) + text = en.text_to_sequence(text, p=1.0) + text = np.expand_dims(np.array(text, dtype="int64"), 0) + lengths = np.array([text.size], dtype=np.int64) + text_seqs = dg.to_variable(text) + text_lengths = dg.to_variable(lengths) + + decoder_layers = self.tts_config["decoder_layers"] + force_monotonic_attention = [False] * decoder_layers + for i in monotonic_layers: + force_monotonic_attention[i] = True + + outputs = self.tts_model( + text_seqs, + text_lengths, + speakers=None, + force_monotonic_attention=force_monotonic_attention, + window=(self.tts_config["backward_step"], self.tts_config["forward_step"])) + decoded, refined, attentions = outputs + if vocoder == 'griffin-lim': + # synthesis use griffin-lim + wav = self.griffin(refined.numpy()[0].T) + elif vocoder == 'waveflow': + # synthesis use waveflow + wav = self.waveflow(fluid.layers.transpose(refined, [0, 2, 1])).numpy()[0] + else: + raise ValueError( + 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' % vocoder) + wavs.append(wav) + return wavs, self.tts_config["sample_rate"] + + @serving + def serving_method(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Run as a service. + """ + wavs, sample_rate = self.synthesize(texts, use_gpu, vocoder) + wavs = [wav.tolist() for wav in wavs] + result = {"wavs": wavs, "sample_rate": sample_rate} + return result + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--vocoder', type=str, default="griffin-lim", choices=['griffin-lim', 'waveflow'], help="the vocoder name") + + def add_module_output_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--output_path', + type=str, + default=os.path.abspath(os.path.join(os.path.curdir, f"{self.name}_prediction")), + help="path to save experiment results") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Ouput options", description="Ouput path. Optional.") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + self.add_module_output_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + mkdir(args.output_path) + wavs, sample_rate = self.synthesize(texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) + + for index, wav in enumerate(wavs): + sf.write(os.path.join(args.output_path, f"{index}.wav"), wav, sample_rate) + + ret = f"The synthesized wav files have been saved in {args.output_path}" + return ret + + +if __name__ == "__main__": + module = DeepVoice3() + test_text = [ + "Simple as this proposition is, it is necessary to be stated", + "Parakeet stands for Paddle PARAllel text-to-speech toolkit.", + ] + wavs, sample_rate = module.synthesize(texts=test_text, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/audio/tts/fastspeech_ljspeech/README.md b/modules/audio/tts/fastspeech_ljspeech/README.md similarity index 100% rename from hub_module/modules/audio/tts/fastspeech_ljspeech/README.md rename to modules/audio/tts/fastspeech_ljspeech/README.md diff --git a/hub_module/modules/audio/tts/deepvoice3_ljspeech/__init__.py b/modules/audio/tts/fastspeech_ljspeech/__init__.py similarity index 100% rename from hub_module/modules/audio/tts/deepvoice3_ljspeech/__init__.py rename to modules/audio/tts/fastspeech_ljspeech/__init__.py diff --git a/modules/audio/tts/fastspeech_ljspeech/module.py b/modules/audio/tts/fastspeech_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b7a8d9fb4f0e1776166c3490d8408a832c38d6 --- /dev/null +++ b/modules/audio/tts/fastspeech_ljspeech/module.py @@ -0,0 +1,265 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import ast +import argparse +import importlib.util + +import nltk +import paddle.fluid as fluid +import paddle.fluid.dygraph as dg +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.common.utils import mkdir +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +from paddlehub.common.dir import THIRD_PARTY_HOME +from paddlehub.common.downloader import default_downloader + +lack_dependency = [] +for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]: + if not importlib.util.find_spec(dependency): + lack_dependency.append(dependency) + +# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. +_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" +_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" +nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") +tokenizers_path = os.path.join(nltk_path, "tokenizers") +corpora_path = os.path.join(nltk_path, "corpora") +punkt_path = os.path.join(tokenizers_path, "punkt") +cmudict_path = os.path.join(corpora_path, "cmudict") + +if not os.path.exists(punkt_path): + default_downloader.download_file_and_uncompress(url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) +if not os.path.exists(cmudict_path): + default_downloader.download_file_and_uncompress(url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) +nltk.data.path.append(nltk_path) + +if not lack_dependency: + import soundfile as sf + import librosa + from ruamel import yaml + from parakeet.models.fastspeech.fastspeech import FastSpeech as FastSpeechModel + from parakeet.g2p.en import text_to_sequence + from parakeet.models.transformer_tts.utils import * + from parakeet.utils import io + from parakeet.modules.weight_norm import WeightNormWrapper + from parakeet.models.waveflow import WaveFlowModule +else: + raise ImportError( + "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" + % ", ".join(lack_dependency)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +@moduleinfo( + name="fastspeech_ljspeech", + version="1.0.0", + summary= + "FastSpeech proposes a novel feed-forward network based on Transformer to generate mel-spectrogram in parallel for TTS. See https://arxiv.org/abs/1905.09263 for details.", + author="baidu-nlp", + author_email="", + type="nlp/tts", +) +class FastSpeech(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", "step-162000") + self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", "vocoder", "step-2000000") + self.waveflow_config_path = os.path.join(self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") + + tts_config_path = os.path.join(self.directory, "assets", "tts", "ljspeech.yaml") + with open(tts_config_path) as f: + self.tts_config = yaml.load(f, Loader=yaml.Loader) + with fluid.dygraph.guard(fluid.CPUPlace()): + self.tts_model = FastSpeechModel(self.tts_config['network'], num_mels=self.tts_config['audio']['num_mels']) + io.load_parameters(model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) + + # Build vocoder. + args = AttrDict() + args.config = self.waveflow_config_path + args.use_fp16 = False + self.waveflow_config = io.add_yaml_config_to_args(args) + self.waveflow = WaveFlowModule(self.waveflow_config) + io.load_parameters(model=self.waveflow, checkpoint_path=self.waveflow_checkpoint_path) + + def synthesize(self, texts, use_gpu=False, speed=1.0, vocoder="griffin-lim"): + """ + Get the synthetic wavs from the texts. + + Args: + texts(list): the input texts to be predicted. + use_gpu(bool): whether use gpu to predict or not. Default False. + speed(float): Controlling the voice speed. Default 1.0. + vocoder(str): the vocoder name, "griffin-lim" or "waveflow". + + Returns: + wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. + sample_rate(int): the audio sample rate. + """ + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + if texts and isinstance(texts, list): + predicted_data = texts + else: + raise ValueError("The input data is inconsistent with expectations.") + + wavs = [] + with fluid.dygraph.guard(place): + self.tts_model.eval() + self.waveflow.eval() + for text in predicted_data: + # init input + logger.info("Processing sentence: %s" % text) + text = np.asarray(text_to_sequence(text)) + text = np.expand_dims(text, axis=0) + pos_text = np.arange(1, text.shape[1] + 1) + pos_text = np.expand_dims(pos_text, axis=0) + + text = dg.to_variable(text).astype(np.int64) + pos_text = dg.to_variable(pos_text).astype(np.int64) + + _, mel_output_postnet = self.tts_model(text, pos_text, alpha=1 / speed) + + if vocoder == 'griffin-lim': + # synthesis use griffin-lim + wav = self.synthesis_with_griffinlim(mel_output_postnet, self.tts_config['audio']) + elif vocoder == 'waveflow': + wav = self.synthesis_with_waveflow(mel_output_postnet, self.waveflow_config.sigma) + else: + raise ValueError( + 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' % vocoder) + wavs.append(wav) + return wavs, self.tts_config['audio']['sr'] + + def synthesis_with_griffinlim(self, mel_output, cfg): + # synthesis with griffin-lim + mel_output = fluid.layers.transpose(fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_output = np.exp(mel_output.numpy()) + basis = librosa.filters.mel(cfg['sr'], cfg['n_fft'], cfg['num_mels'], fmin=cfg['fmin'], fmax=cfg['fmax']) + inv_basis = np.linalg.pinv(basis) + spec = np.maximum(1e-10, np.dot(inv_basis, mel_output)) + + wav = librosa.core.griffinlim(spec**cfg['power'], hop_length=cfg['hop_length'], win_length=cfg['win_length']) + + return wav + + def synthesis_with_waveflow(self, mel_output, sigma): + mel_spectrogram = fluid.layers.transpose(fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_spectrogram = fluid.layers.unsqueeze(mel_spectrogram, [0]) + + for layer in self.waveflow.sublayers(): + if isinstance(layer, WeightNormWrapper): + layer.remove_weight_norm() + + # Run model inference. + wav = self.waveflow.synthesize(mel_spectrogram, sigma=sigma) + return wav.numpy()[0] + + @serving + def serving_method(self, texts, use_gpu=False, speed=1.0, vocoder="griffin-lim"): + """ + Run as a service. + """ + wavs, sample_rate = self.synthesize(texts, use_gpu, speed, vocoder) + wavs = [wav.tolist() for wav in wavs] + result = {"wavs": wavs, "sample_rate": sample_rate} + return result + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--vocoder', type=str, default="griffin-lim", choices=['griffin-lim', 'waveflow'], help="the vocoder name") + + def add_module_output_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--output_path', + type=str, + default=os.path.abspath(os.path.join(os.path.curdir, f"{self.name}_prediction")), + help="path to save experiment results") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Ouput options", description="Ouput path. Optional.") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + self.add_module_output_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + mkdir(args.output_path) + wavs, sample_rate = self.synthesize(texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) + + for index, wav in enumerate(wavs): + sf.write(os.path.join(args.output_path, f"{index}.wav"), wav, sample_rate) + + ret = f"The synthesized wav files have been saved in {args.output_path}" + return ret + + +if __name__ == "__main__": + + module = FastSpeech() + test_text = [ + "Simple as this proposition is, it is necessary to be stated", + ] + wavs, sample_rate = module.synthesize(texts=test_text, speed=1, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/audio/tts/transformer_tts_ljspeech/README.md b/modules/audio/tts/transformer_tts_ljspeech/README.md similarity index 100% rename from hub_module/modules/audio/tts/transformer_tts_ljspeech/README.md rename to modules/audio/tts/transformer_tts_ljspeech/README.md diff --git a/hub_module/modules/audio/tts/fastspeech_ljspeech/__init__.py b/modules/audio/tts/transformer_tts_ljspeech/__init__.py similarity index 100% rename from hub_module/modules/audio/tts/fastspeech_ljspeech/__init__.py rename to modules/audio/tts/transformer_tts_ljspeech/__init__.py diff --git a/modules/audio/tts/transformer_tts_ljspeech/module.py b/modules/audio/tts/transformer_tts_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ee8e68cde97eaf755e457902a73ba8e82847a243 --- /dev/null +++ b/modules/audio/tts/transformer_tts_ljspeech/module.py @@ -0,0 +1,282 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import ast +import argparse +import importlib.util + +import nltk +import paddle.fluid as fluid +import paddle.fluid.dygraph as dg +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.common.utils import mkdir +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +from paddlehub.common.dir import THIRD_PARTY_HOME +from paddlehub.common.downloader import default_downloader + +lack_dependency = [] +for dependency in ["ruamel", "parakeet", "scipy", "soundfile", "librosa"]: + if not importlib.util.find_spec(dependency): + lack_dependency.append(dependency) + +# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. +_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" +_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" +nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") +tokenizers_path = os.path.join(nltk_path, "tokenizers") +corpora_path = os.path.join(nltk_path, "corpora") +punkt_path = os.path.join(tokenizers_path, "punkt") +cmudict_path = os.path.join(corpora_path, "cmudict") + +if not os.path.exists(punkt_path): + default_downloader.download_file_and_uncompress(url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) +if not os.path.exists(cmudict_path): + default_downloader.download_file_and_uncompress(url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) +nltk.data.path.append(nltk_path) + +if not lack_dependency: + import soundfile as sf + import librosa + from ruamel import yaml + from scipy.io.wavfile import write + from parakeet.g2p.en import text_to_sequence + from parakeet.models.transformer_tts.utils import * + from parakeet.models.transformer_tts import TransformerTTS as TransformerTTSModel + from parakeet.models.waveflow import WaveFlowModule + from parakeet.utils import io + from parakeet.modules.weight_norm import WeightNormWrapper +else: + raise ImportError( + "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" + % ", ".join(lack_dependency)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +@moduleinfo( + name="transformer_tts_ljspeech", + version="1.0.0", + summary= + "Transformer TTS introduces and adapts the multi-head attention mechanism to replace the RNN structures and also the original attention mechanism in Tacotron2. See https://arxiv.org/abs/1809.08895 for details", + author="baidu-nlp", + author_email="", + type="nlp/tts", +) +class TransformerTTS(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", "step-120000") + self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", "vocoder", "step-2000000") + self.waveflow_config_path = os.path.join(self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") + + tts_config_path = os.path.join(self.directory, "assets", "tts", "ljspeech.yaml") + with open(tts_config_path) as f: + self.tts_config = yaml.load(f, Loader=yaml.Loader) + + # The max length of audio when synthsis. + self.max_len = 1000 + # The threshold of stop token which indicates the time step should stop generate spectrum or not. + self.stop_threshold = 0.5 + + with fluid.dygraph.guard(fluid.CPUPlace()): + # Build TTS. + with fluid.unique_name.guard(): + network_cfg = self.tts_config['network'] + self.tts_model = TransformerTTSModel( + network_cfg['embedding_size'], network_cfg['hidden_size'], network_cfg['encoder_num_head'], + network_cfg['encoder_n_layers'], self.tts_config['audio']['num_mels'], + network_cfg['outputs_per_step'], network_cfg['decoder_num_head'], network_cfg['decoder_n_layers']) + io.load_parameters(model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) + + # Build vocoder. + args = AttrDict() + args.config = self.waveflow_config_path + args.use_fp16 = False + self.waveflow_config = io.add_yaml_config_to_args(args) + self.waveflow = WaveFlowModule(self.waveflow_config) + io.load_parameters(model=self.waveflow, checkpoint_path=self.waveflow_checkpoint_path) + + def synthesize(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Get the synthetic wavs from the texts. + + Args: + texts(list): the input texts to be predicted. + use_gpu(bool): whether use gpu to predict or not + vocoder(str): the vocoder name, "griffin-lim" or "waveflow" + + Returns: + wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. + sample_rate(int): the audio sample rate. + """ + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + if texts and isinstance(texts, list): + predicted_data = texts + else: + raise ValueError("The input data is inconsistent with expectations.") + + wavs = [] + with fluid.dygraph.guard(place): + self.tts_model.eval() + self.waveflow.eval() + for text in predicted_data: + # init input + logger.info("Processing sentence: %s" % text) + text = np.asarray(text_to_sequence(text)) + text = fluid.layers.unsqueeze(dg.to_variable(text).astype(np.int64), [0]) + mel_input = dg.to_variable(np.zeros([1, 1, 80])).astype(np.float32) + pos_text = np.arange(1, text.shape[1] + 1) + pos_text = fluid.layers.unsqueeze(dg.to_variable(pos_text).astype(np.int64), [0]) + + for i in range(self.max_len): + pos_mel = np.arange(1, mel_input.shape[1] + 1) + pos_mel = fluid.layers.unsqueeze(dg.to_variable(pos_mel).astype(np.int64), [0]) + mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = self.tts_model( + text, mel_input, pos_text, pos_mel) + if stop_preds.numpy()[0, -1] > self.stop_threshold: + break + mel_input = fluid.layers.concat([mel_input, postnet_pred[:, -1:, :]], axis=1) + if vocoder == 'griffin-lim': + # synthesis use griffin-lim + wav = self.synthesis_with_griffinlim(postnet_pred, self.tts_config['audio']) + elif vocoder == 'waveflow': + # synthesis use waveflow + wav = self.synthesis_with_waveflow(postnet_pred, self.waveflow_config.sigma) + else: + raise ValueError( + 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' % vocoder) + wavs.append(wav) + return wavs, self.tts_config['audio']['sr'] + + def synthesis_with_griffinlim(self, mel_output, cfg): + # synthesis with griffin-lim + mel_output = fluid.layers.transpose(fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_output = np.exp(mel_output.numpy()) + basis = librosa.filters.mel(cfg['sr'], cfg['n_fft'], cfg['num_mels'], fmin=cfg['fmin'], fmax=cfg['fmax']) + inv_basis = np.linalg.pinv(basis) + spec = np.maximum(1e-10, np.dot(inv_basis, mel_output)) + + wav = librosa.core.griffinlim(spec**cfg['power'], hop_length=cfg['hop_length'], win_length=cfg['win_length']) + + return wav + + def synthesis_with_waveflow(self, mel_output, sigma): + mel_spectrogram = fluid.layers.transpose(fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_spectrogram = fluid.layers.unsqueeze(mel_spectrogram, [0]) + + for layer in self.waveflow.sublayers(): + if isinstance(layer, WeightNormWrapper): + layer.remove_weight_norm() + + # Run model inference. + wav = self.waveflow.synthesize(mel_spectrogram, sigma=sigma) + return wav.numpy()[0] + + @serving + def serving_method(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Run as a service. + """ + wavs, sample_rate = self.synthesize(texts, use_gpu, vocoder) + wavs = [wav.tolist() for wav in wavs] + result = {"wavs": wavs, "sample_rate": sample_rate} + return result + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--vocoder', type=str, default="griffin-lim", choices=['griffin-lim', 'waveflow'], help="the vocoder name") + + def add_module_output_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--output_path', + type=str, + default=os.path.abspath(os.path.join(os.path.curdir, f"{self.name}_prediction")), + help="path to save experiment results") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Ouput options", description="Ouput path. Optional.") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + self.add_module_output_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + mkdir(args.output_path) + wavs, sample_rate = self.synthesize(texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) + + for index, wav in enumerate(wavs): + sf.write(os.path.join(args.output_path, f"{index}.wav"), wav, sample_rate) + + ret = f"The synthesized wav files have been saved in {args.output_path}" + return ret + + +if __name__ == "__main__": + + module = TransformerTTS() + test_text = [ + "Life was like a box of chocolates, you never know what you're gonna get.", + ] + wavs, sample_rate = module.synthesize(texts=test_text, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/demo/README.md b/modules/demo/README.md similarity index 100% rename from hub_module/modules/demo/README.md rename to modules/demo/README.md diff --git a/hub_module/modules/audio/tts/transformer_tts_ljspeech/__init__.py b/modules/demo/senta_test/__init__.py similarity index 100% rename from hub_module/modules/audio/tts/transformer_tts_ljspeech/__init__.py rename to modules/demo/senta_test/__init__.py diff --git a/modules/demo/senta_test/module.py b/modules/demo/senta_test/module.py new file mode 100644 index 0000000000000000000000000000000000000000..03f82d6372295b5cff4f6cf153442d310529d901 --- /dev/null +++ b/modules/demo/senta_test/module.py @@ -0,0 +1,45 @@ +import argparse +import os + +import paddlehub as hub +from paddlehub.module.module import runnable, moduleinfo + +from senta_test.processor import load_vocab + + +@moduleinfo( + name="senta_test", + version="1.0.0", + summary="This is a PaddleHub Module. Just for test.", + author="anonymous", + author_email="", + type="nlp/sentiment_analysis", +) +class SentaTest(hub.Module): + def _initialize(self): + # add arg parser + self.parser = argparse.ArgumentParser( + description="Run the senta_test module.", prog='hub run senta_test', usage='%(prog)s', add_help=True) + self.parser.add_argument('--input_text', type=str, default=None, help="text to predict") + + # load word dict + vocab_path = os.path.join(self.directory, "vocab.list") + self.vocab = load_vocab(vocab_path) + + def sentiment_classify(self, texts): + results = [] + for text in texts: + sentiment = "positive" + for word in self.vocab: + if word in text: + sentiment = "negative" + break + results.append({"text": text, "sentiment": sentiment}) + + return results + + @runnable + def run_cmd(self, argvs): + args = self.parser.parse_args(argvs) + texts = [args.input_text] + return self.sentiment_classify(texts) diff --git a/hub_module/modules/demo/senta_test/processor.py b/modules/demo/senta_test/processor.py similarity index 100% rename from hub_module/modules/demo/senta_test/processor.py rename to modules/demo/senta_test/processor.py diff --git a/hub_module/modules/demo/senta_test/vocab.list b/modules/demo/senta_test/vocab.list similarity index 100% rename from hub_module/modules/demo/senta_test/vocab.list rename to modules/demo/senta_test/vocab.list diff --git a/hub_module/modules/demo/test.py b/modules/demo/test.py similarity index 100% rename from hub_module/modules/demo/test.py rename to modules/demo/test.py diff --git a/hub_module/modules/image/README.md b/modules/image/README.md similarity index 100% rename from hub_module/modules/image/README.md rename to modules/image/README.md diff --git a/hub_module/modules/image/classification/README.md b/modules/image/classification/README.md similarity index 100% rename from hub_module/modules/image/classification/README.md rename to modules/image/classification/README.md diff --git a/hub_module/modules/demo/senta_test/__init__.py b/modules/image/classification/darknet53_imagenet/__init__.py similarity index 100% rename from hub_module/modules/demo/senta_test/__init__.py rename to modules/image/classification/darknet53_imagenet/__init__.py diff --git a/modules/image/classification/darknet53_imagenet/darknet.py b/modules/image/classification/darknet53_imagenet/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..58c5b090172b042d7df701c6691b3c1e867c1b23 --- /dev/null +++ b/modules/image/classification/darknet53_imagenet/darknet.py @@ -0,0 +1,123 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import math + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['DarkNet'] + + +class DarkNet(object): + """DarkNet, see https://pjreddie.com/darknet/yolo/ + Args: + depth (int): network depth, currently only darknet 53 is supported + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + get_prediction (bool): whether to get prediction + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=53, + norm_type='sync_bn', + norm_decay=0., + weight_prefix_name='', + get_prediction=False, + class_dim=1000): + assert depth in [53], "unsupported depth value" + self.depth = depth + self.norm_type = norm_type + self.norm_decay = norm_decay + self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} + self.prefix_name = weight_prefix_name + self.class_dim = class_dim + self.get_prediction = get_prediction + + def _conv_norm(self, input, ch_out, filter_size, stride, padding, act='leaky', name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.offset') + + out = fluid.layers.batch_norm( + input=conv, + act=None, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + # leaky relu here has `alpha` as 0.1, can not be set by + # `act` param in fluid.layers.batch_norm above. + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + + return out + + def _downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): + return self._conv_norm(input, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, name=name) + + def basicblock(self, input, ch_out, name=None): + conv1 = self._conv_norm(input, ch_out=ch_out, filter_size=1, stride=1, padding=0, name=name + ".0") + conv2 = self._conv_norm(conv1, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, name=name + ".1") + out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) + return out + + def layer_warp(self, block_func, input, ch_out, count, name=None): + out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) + for j in six.moves.xrange(1, count): + out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) + return out + + def __call__(self, input): + """Get the backbone of DarkNet, that is output for the 5 stages. + + :param input: Variable of input image + :type input: Variable + :Returns: The last variables of each stage. + """ + stages, block_func = self.depth_cfg[self.depth] + stages = stages[0:5] + conv = self._conv_norm( + input=input, ch_out=32, filter_size=3, stride=1, padding=1, name=self.prefix_name + "yolo_input") + downsample_ = self._downsample( + input=conv, ch_out=conv.shape[1] * 2, name=self.prefix_name + "yolo_input.downsample") + blocks = [] + for i, stage in enumerate(stages): + block = self.layer_warp( + block_func=block_func, + input=downsample_, + ch_out=32 * 2**i, + count=stage, + name=self.prefix_name + "stage.{}".format(i)) + blocks.append(block) + if i < len(stages) - 1: # do not downsaple in the last stage + downsample_ = self._downsample( + input=block, ch_out=block.shape[1] * 2, name=self.prefix_name + "stage.{}.downsample".format(i)) + if self.get_prediction: + pool = fluid.layers.pool2d(input=block, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.softmax(out) + return out + else: + return blocks diff --git a/modules/image/classification/darknet53_imagenet/data_feed.py b/modules/image/classification/darknet53_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9d790e93333f3742be9ee58215da87eb2ef819 --- /dev/null +++ b/modules/image/classification/darknet53_imagenet/data_feed.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def test_reader(paths=None, images=None): + """data generator + :param paths: path to images. + :type paths: list, each element is a str + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + """ + img_list = [] + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = Image.open(img_path) + #img = cv2.imread(img_path) + img_list.append(img) + if images is not None: + for img in images: + img_list.append(Image.fromarray(np.uint8(img))) + for im in img_list: + im = process_image(im) + yield im diff --git a/hub_module/modules/image/classification/darknet53_imagenet/label_file.txt b/modules/image/classification/darknet53_imagenet/label_file.txt similarity index 100% rename from hub_module/modules/image/classification/darknet53_imagenet/label_file.txt rename to modules/image/classification/darknet53_imagenet/label_file.txt diff --git a/modules/image/classification/darknet53_imagenet/module.py b/modules/image/classification/darknet53_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..0f3ab1c50e0de3d73566a4e400cf741bb5be71e5 --- /dev/null +++ b/modules/image/classification/darknet53_imagenet/module.py @@ -0,0 +1,213 @@ +import os +import ast +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.io.parser import txt_parser + +from darknet53_imagenet.darknet import DarkNet +from darknet53_imagenet.processor import load_label_info +from darknet53_imagenet.data_feed import test_reader + + +@moduleinfo( + name="darknet53_imagenet", + version="1.1.0", + type="cv/classification", + summary="DarkNet53 is a image classfication model trained with ImageNet-2012 dataset.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class DarkNet53(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "darknet53_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.infer_prog = None + self.pred_out = None + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, input_image=None, trainable=True, pretrained=True, param_prefix='', get_prediction=False): + """Distill the Head Features, so as to perform transfer learning. + + :param input_image: image tensor. + :type input_image: + :param trainable: whether to set parameters trainable. + :type trainable: bool + :param pretrained: whether to load default pretrained model. + :type pretrained: bool + :param param_prefix: the prefix of parameters in yolo_head and backbone + :type param_prefix: str + :param get_prediction: whether to get prediction, + if True, outputs is {'bbox_out': bbox_out}, + if False, outputs is {'head_features': head_features}. + :type get_prediction: bool + """ + context_prog = input_image.block.program if input_image else fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + image = input_image if input_image else fluid.data( + name='image', shape=[-1, 3, 224, 224], dtype='float32', lod_level=0) + backbone = DarkNet(get_prediction=get_prediction) + out = backbone(image) + inputs = {'image': image} + if get_prediction: + outputs = {'pred_out': out} + else: + outputs = {'body_feats': out} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + if not param_prefix: + fluid.io.load_vars( + exe, self.default_pretrained_model_path, main_program=context_prog, predicate=_if_exist) + else: + exe.run(startup_program) + return inputs, outputs, context_prog + + def classification(self, paths=None, images=None, use_gpu=False, batch_size=1, top_k=2): + """API of Classification. + :param paths: the path of images. + :type paths: list, each element is correspond to the path of an image. + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + :param use_gpu: whether to use gpu or not. + :type use_gpu: bool + :param batch_size: batch size. + :type batch_size: int + :param top_k : top k + :type top_k : int + """ + if self.infer_prog is None: + inputs, outputs, self.infer_prog = self.context(trainable=False, pretrained=True, get_prediction=True) + self.infer_prog = self.infer_prog.clone(for_test=True) + self.pred_out = outputs['pred_out'] + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + all_images = [] + paths = paths if paths else [] + for yield_data in test_reader(paths, images): + all_images.append(yield_data) + + images_num = len(all_images) + loop_num = int(np.ceil(images_num / batch_size)) + + res_list = [] + top_k = max(min(top_k, 1000), 1) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + batch_data = np.array(batch_data).astype('float32') + data_tensor = PaddleTensor(batch_data.copy()) + if use_gpu: + result = self.gpu_predictor.run([data_tensor]) + else: + result = self.cpu_predictor.run([data_tensor]) + for i, res in enumerate(result[0].as_ndarray()): + res_dict = {} + pred_label = np.argsort(res)[::-1][:top_k] + for k in pred_label: + class_name = self.label_names[int(k)].split(',')[0] + max_prob = res[k] + res_dict[class_name] = max_prob + res_list.append(res_dict) + return res_list + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = [] + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.classification(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/darknet53_imagenet/processor.py b/modules/image/classification/darknet53_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/darknet53_imagenet/processor.py rename to modules/image/classification/darknet53_imagenet/processor.py diff --git a/modules/image/classification/efficientnetb0_imagenet/module.py b/modules/image/classification/efficientnetb0_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ec8c27142fa6923cad4d6dc4bac06312861637a6 --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b0": [224, 112, 112, 56, 28, 14, 14, 7]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb0_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb0_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B0(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B0, self).__init__() + + model_name = 'efficientnet-b0' + self.name = "b0" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 320 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b0_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b0_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb0_small_imagenet/module.py b/modules/image/classification/efficientnetb0_small_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..57716199d4ec6b809e4e0fc11f8bdaa431d949c9 --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b0": [224, 112, 112, 56, 28, 14, 14, 7]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb0_small_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnet_b0_small_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B0(nn.Layer): + def __init__(self, + is_test: bool = True, + padding_type: str = 'DYNAMIC', + override_params: dict = None, + use_se: bool = False, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B0, self).__init__() + + model_name = 'efficientnet-b0' + self.name = "b0" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 320 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b0_small_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b0_small_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb1_imagenet/module.py b/modules/image/classification/efficientnetb1_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2a5bd31f40a40c3b4865694d909fb5409dc7d9cc --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b1': (1.0, 1.1, 240, 0.2) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b1": [240, 120, 120, 60, 30, 15, 15, 8]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb1_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb1_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B1(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B1, self).__init__() + + model_name = 'efficientnet-b1' + self.name = "b1" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 320 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b1_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b1_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb2_imagenet/module.py b/modules/image/classification/efficientnetb2_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fed9968bab2fdc783a47f65b5de51af907ce6b47 --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b2': (1.1, 1.2, 260, 0.3) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b2": [260, 130, 130, 65, 33, 17, 17, 9]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb2_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb2_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B2(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B2, self).__init__() + + model_name = 'efficientnet-b2' + self.name = "b2" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 352 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b2_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b2_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb3_imagenet/module.py b/modules/image/classification/efficientnetb3_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..4b3903db63470ce9adbd71e2cccd82fdef2d7d63 --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b3': (1.2, 1.4, 300, 0.3) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b3": [300, 150, 150, 75, 38, 19, 19, 10]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb3_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb3_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B3(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B3, self).__init__() + + model_name = 'efficientnet-b3' + self.name = "b3" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 384 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b3_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b3_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb4_imagenet/module.py b/modules/image/classification/efficientnetb4_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..58d1f91eb956b149bad0938ca9d228ac3798e1a9 --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b4': (1.4, 1.8, 380, 0.4) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b4": [380, 190, 190, 95, 48, 24, 24, 12]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb4_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb4_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B4(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B4, self).__init__() + + model_name = 'efficientnet-b4' + self.name = "b4" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 448 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b4_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b4_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb5_imagenet/module.py b/modules/image/classification/efficientnetb5_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7232005233995a08e02678c681604df32041675c --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b5': (1.6, 2.2, 456, 0.4) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b5": [456, 228, 228, 114, 57, 29, 29, 15]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb5_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb5_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B5(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B5, self).__init__() + + model_name = 'efficientnet-b5' + self.name = "b5" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 512 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b5_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b5_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb6_imagenet/module.py b/modules/image/classification/efficientnetb6_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d2323354f408c8dcd19dfc9fbfb6255212c4fb2d --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b6': (1.8, 2.6, 528, 0.5) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b6": [528, 264, 264, 132, 66, 33, 33, 17]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb6_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb6_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B6(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B6, self).__init__() + + model_name = 'efficientnet-b6' + self.name = "b6" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 576 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b6_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b6_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/modules/image/classification/efficientnetb7_imagenet/module.py b/modules/image/classification/efficientnetb7_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..9365b5359076c79b53879dfddc5556209fb45c6e --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/module.py @@ -0,0 +1,791 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math +import collections +import re +import copy + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def efficientnet_params(model_name: str): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b7': (2.0, 3.1, 600, 0.5) + } + return params_dict[model_name] + + +def efficientnet(width_coefficient: float = None, + depth_coefficient: float = None, + dropout_rate: float = 0.2, + drop_connect_rate: float = 0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + + return blocks_args, global_params + + +def get_model_params(model_name: str, override_params: dict): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters: int, global_params: dict): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats: int, global_params: dict): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string: str): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list: list): + """ + Decode a list of string notations to specify blocks in the network. + + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args: list): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name: str, use_bias: bool = False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name: str = "batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name: str = "fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): + """Calculate padding size.""" + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +inp_shape = {"b7": [600, 300, 300, 150, 75, 38, 38, 19]} + + +def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): + """Drop input connection""" + if is_test: + return inputs + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2ds(nn.Layer): + """Basic conv layer""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = None, + name: str = "conv2d", + act: str = None, + use_bias: bool = False, + padding_type: str = None, + model_name: str = None, + cur_stage: str = None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + self._conv = Conv2d( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + input_channels: int, + filter_size: int, + output_channels: int, + stride: int = 1, + num_groups: int = 1, + padding_type: str = "SAME", + conv_act: str = None, + bn_act: str = "swish", + use_bn: bool = True, + use_bias: bool = False, + name: str = None, + conv_name: str = None, + bn_name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2ds( + input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + + self._bn = BatchNorm( + num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs: paddle.Tensor): + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + """Expand conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer( + input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + """Depthwise conv norm layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + + self._conv = ConvBNLayer( + input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + """Projection conv bn layer.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer( + input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + """Basic Squeeze-and-Excitation block for Efficientnet.""" + + def __init__(self, + input_channels: int, + num_squeezed_channels: int, + oup: int, + padding_type: str, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2d(1) + self._conv1 = Conv2ds( + input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + act="sigmoid", + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs: paddle.Tensor): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + return paddle.multiply(inputs, x) + + +class MbConvBlock(nn.Layer): + """Mobile inverted bottleneck convolution for Efficientnet.""" + + def __init__(self, + input_channels: int, + block_args: dict, + padding_type: str, + use_se: bool, + name: str = None, + drop_connect_rate: float = None, + is_test: bool = False, + model_name: str = None, + cur_stage: str = None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + self.is_test = is_test + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm( + input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock( + input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm( + input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, self.is_test) + x = paddle.elementwise_add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + """Basic conv stem norm block for extracting features.""" + + def __init__(self, + input_channels: int, + padding_type: str, + _global_params: dict, + name: str = None, + model_name: str = None, + cur_stage: str = None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + self._conv = ConvBNLayer( + input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs: paddle.Tensor): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + """Extract features.""" + + def __init__(self, + input_channels: int, + _block_args: dict, + _global_params: dict, + padding_type: str, + use_se: bool, + is_test: bool, + model_name: str = None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm( + input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace( + input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock( + block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs: paddle.Tensor): + x = self._conv_stem(inputs) + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +@moduleinfo( + name="efficientnetb7_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="efficientnetb7_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class EfficientNet_B7(nn.Layer): + def __init__(self, + is_test: bool = False, + padding_type: str = "SAME", + override_params: dict = None, + use_se: bool = True, + class_dim: int = 1000, + load_checkpoint: str = None): + super(EfficientNet_B7, self).__init__() + + model_name = 'efficientnet-b7' + self.name = "b7" + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + self.is_test = is_test + + self._ef = ExtractFeatures( + 3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + self.is_test, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + oup = 640 + + self._conv = ConvBNLayer( + oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2d(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'efficientnet_b7_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b7_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._ef(inputs) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + return x diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md similarity index 100% rename from hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md rename to modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md diff --git a/hub_module/modules/image/classification/darknet53_imagenet/__init__.py b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/__init__.py similarity index 100% rename from hub_module/modules/image/classification/darknet53_imagenet/__init__.py rename to modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/__init__.py diff --git a/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/data_feed.py b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/label_list.txt b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/label_list.txt rename to modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/label_list.txt diff --git a/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/module.py b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..686431e06bb88d77e2d06cec3a3a5ab4721e01f2 --- /dev/null +++ b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/module.py @@ -0,0 +1,231 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from fix_resnext101_32x48d_wsl_imagenet.processor import postprocess, base64_to_cv2 +from fix_resnext101_32x48d_wsl_imagenet.data_feed import reader +from fix_resnext101_32x48d_wsl_imagenet.resnext101_wsl import Fix_ResNeXt101_32x48d_wsl + + +@moduleinfo( + name="fix_resnext101_32x48d_wsl_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="fix_resnext101_32x48d_wsl is a image classfication model, this module is trained with imagenet datasets.", + version="1.0.0") +class FixResnext10132x48dwslImagenet(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self.predictor_set = False + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + resnet_vd = Fix_ResNeXt101_32x48d_wsl() + output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if not self.predictor_set: + self._set_config() + self.predictor_set = True + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/processor.py b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/processor.py rename to modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/processor.py diff --git a/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/resnext101_wsl.py b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/resnext101_wsl.py new file mode 100644 index 0000000000000000000000000000000000000000..3c01d8bddec17cd631c77bf552edc6d808a39002 --- /dev/null +++ b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/resnext101_wsl.py @@ -0,0 +1,147 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNeXt101_32x8d_wsl", "ResNeXt101_32x16d_wsl", "ResNeXt101_32x32d_wsl", "ResNeXt101_32x48d_wsl", + "Fix_ResNeXt101_32x48d_wsl" +] + + +class ResNeXt101_wsl(): + def __init__(self, layers=101, cardinality=32, width=48): + self.layers = layers + self.cardinality = cardinality + self.width = width + + def net(self, input, class_dim=1000): + layers = self.layers + cardinality = self.cardinality + width = self.width + + depth = [3, 4, 23, 3] + base_width = cardinality * width + num_filters = [base_width * i for i in [1, 2, 4, 8]] + + conv = self.conv_bn_layer( + input=input, num_filters=64, filter_size=7, stride=2, act='relu', name="conv1") #debug + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = 'layer' + str(block + 1) + "." + str(i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc.weight'), + bias_attr=fluid.param_attr.ParamAttr(name='fc.bias')) + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + if "downsample" in name: + conv_name = name + '.0' + else: + conv_name = name + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=conv_name + ".weight"), + bias_attr=False) + if "downsample" in name: + bn_name = name[:9] + 'downsample' + '.1' + else: + if "conv1" == name: + bn_name = 'bn' + name[-1] + else: + bn_name = (name[:10] if name[7:9].isdigit() else name[:9]) + 'bn' + name[-1] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '.weight'), + bias_attr=ParamAttr(bn_name + '.bias'), + moving_mean_name=bn_name + '.running_mean', + moving_variance_name=bn_name + '.running_var', + ) + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, cardinality, name): + cardinality = self.cardinality + width = self.width + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + ".conv1") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name=name + ".conv2") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters // (width // 8), filter_size=1, act=None, name=name + ".conv3") + + short = self.shortcut(input, num_filters // (width // 8), stride, name=name + ".downsample") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNeXt101_32x8d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=8) + return model + + +def ResNeXt101_32x16d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=16) + return model + + +def ResNeXt101_32x32d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=32) + return model + + +def ResNeXt101_32x48d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=48) + return model + + +def Fix_ResNeXt101_32x48d_wsl(): + model = ResNeXt101_wsl(cardinality=32, width=48) + return model diff --git a/modules/image/classification/googlenet_imagenet/module.py b/modules/image/classification/googlenet_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7bcf172fcbc06106b66978447ceed89a5a9ef14e --- /dev/null +++ b/modules/image/classification/googlenet_imagenet/module.py @@ -0,0 +1,182 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def xavier(channels: int, filter_size: int, name: str): + """Initialize the weights by uniform distribution.""" + stdv = (3.0 / (filter_size**2 * channels))**0.5 + param_attr = ParamAttr(initializer=Uniform(-stdv, stdv), name=name + "_weights") + return param_attr + + +class ConvLayer(nn.Layer): + """Basic conv2d layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + name: str = None): + super(ConvLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + return y + + +class Inception(nn.Layer): + """Inception block.""" + + def __init__(self, + input_channels: int, + output_channels: int, + filter1: int, + filter3R: int, + filter3: int, + filter5R: int, + filter5: int, + proj: int, + name: str = None): + super(Inception, self).__init__() + + self._conv1 = ConvLayer(input_channels, filter1, 1, name="inception_" + name + "_1x1") + self._conv3r = ConvLayer(input_channels, filter3R, 1, name="inception_" + name + "_3x3_reduce") + self._conv3 = ConvLayer(filter3R, filter3, 3, name="inception_" + name + "_3x3") + self._conv5r = ConvLayer(input_channels, filter5R, 1, name="inception_" + name + "_5x5_reduce") + self._conv5 = ConvLayer(filter5R, filter5, 5, name="inception_" + name + "_5x5") + self._pool = MaxPool2d(kernel_size=3, stride=1, padding=1) + + self._convprj = ConvLayer(input_channels, proj, 1, name="inception_" + name + "_3x3_proj") + + def forward(self, inputs: paddle.Tensor): + conv1 = self._conv1(inputs) + + conv3r = self._conv3r(inputs) + conv3 = self._conv3(conv3r) + + conv5r = self._conv5r(inputs) + conv5 = self._conv5(conv5r) + + pool = self._pool(inputs) + convprj = self._convprj(pool) + + cat = paddle.concat([conv1, conv3, conv5, convprj], axis=1) + cat = F.relu(cat) + return cat + + +@moduleinfo( + name="googlenet_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="GoogleNet_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class GoogleNet(nn.Layer): + """GoogleNet model""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(GoogleNet, self).__init__() + self._conv = ConvLayer(3, 64, 7, 2, name="conv1") + self._pool = MaxPool2d(kernel_size=3, stride=2) + self._conv_1 = ConvLayer(64, 64, 1, name="conv2_1x1") + self._conv_2 = ConvLayer(64, 192, 3, name="conv2_3x3") + + self._ince3a = Inception(192, 192, 64, 96, 128, 16, 32, 32, name="ince3a") + self._ince3b = Inception(256, 256, 128, 128, 192, 32, 96, 64, name="ince3b") + + self._ince4a = Inception(480, 480, 192, 96, 208, 16, 48, 64, name="ince4a") + self._ince4b = Inception(512, 512, 160, 112, 224, 24, 64, 64, name="ince4b") + self._ince4c = Inception(512, 512, 128, 128, 256, 24, 64, 64, name="ince4c") + self._ince4d = Inception(512, 512, 112, 144, 288, 32, 64, 64, name="ince4d") + self._ince4e = Inception(528, 528, 256, 160, 320, 32, 128, 128, name="ince4e") + + self._ince5a = Inception(832, 832, 256, 160, 320, 32, 128, 128, name="ince5a") + self._ince5b = Inception(832, 832, 384, 192, 384, 48, 128, 128, name="ince5b") + + self._pool_5 = AvgPool2d(kernel_size=7, stride=7) + + self._drop = Dropout(p=0.4, mode="downscale_in_infer") + self._fc_out = Linear( + 1024, class_dim, weight_attr=xavier(1024, 1, "out"), bias_attr=ParamAttr(name="out_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'googlenet_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/googlenet_imagenet.pdparams -O' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._conv(inputs) + x = self._pool(x) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._pool(x) + + x = self._ince3a(x) + x = self._ince3b(x) + x = self._pool(x) + + ince4a = self._ince4a(x) + x = self._ince4b(ince4a) + x = self._ince4c(x) + ince4d = self._ince4d(x) + x = self._ince4e(ince4d) + x = self._pool(x) + + x = self._ince5a(x) + ince5b = self._ince5b(x) + + x = self._pool_5(ince5b) + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + out = self._fc_out(x) + out = F.softmax(out) + + return out diff --git a/modules/image/classification/inceptionv4_imagenet/module.py b/modules/image/classification/inceptionv4_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3fc4f1d028e7214fc9e466f1b1e2662ed79f18c5 --- /dev/null +++ b/modules/image/classification/inceptionv4_imagenet/module.py @@ -0,0 +1,352 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + padding: int = 0, + groups: int = 1, + act: str = 'relu', + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class InceptionStem(nn.Layer): + """InceptionV4 stem module.""" + + def __init__(self): + super(InceptionStem, self).__init__() + self._conv_1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name="conv1_3x3_s2") + self._conv_2 = ConvBNLayer(32, 32, 3, act="relu", name="conv2_3x3_s1") + self._conv_3 = ConvBNLayer(32, 64, 3, padding=1, act="relu", name="conv3_3x3_s1") + self._pool = MaxPool2d(kernel_size=3, stride=2, padding=0) + self._conv2 = ConvBNLayer(64, 96, 3, stride=2, act="relu", name="inception_stem1_3x3_s2") + self._conv1_1 = ConvBNLayer(160, 64, 1, act="relu", name="inception_stem2_3x3_reduce") + self._conv1_2 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3") + self._conv2_1 = ConvBNLayer(160, 64, 1, act="relu", name="inception_stem2_1x7_reduce") + self._conv2_2 = ConvBNLayer(64, 64, (7, 1), padding=(3, 0), act="relu", name="inception_stem2_1x7") + self._conv2_3 = ConvBNLayer(64, 64, (1, 7), padding=(0, 3), act="relu", name="inception_stem2_7x1") + self._conv2_4 = ConvBNLayer(64, 96, 3, act="relu", name="inception_stem2_3x3_2") + self._conv3 = ConvBNLayer(192, 192, 3, stride=2, act="relu", name="inception_stem3_3x3_s2") + + def forward(self, inputs: paddle.Tensor): + conv = self._conv_1(inputs) + conv = self._conv_2(conv) + conv = self._conv_3(conv) + + pool1 = self._pool(conv) + conv2 = self._conv2(conv) + concat = paddle.concat([pool1, conv2], axis=1) + + conv1 = self._conv1_1(concat) + conv1 = self._conv1_2(conv1) + + conv2 = self._conv2_1(concat) + conv2 = self._conv2_2(conv2) + conv2 = self._conv2_3(conv2) + conv2 = self._conv2_4(conv2) + + concat = paddle.concat([conv1, conv2], axis=1) + + conv1 = self._conv3(concat) + pool1 = self._pool(concat) + + concat = paddle.concat([conv1, pool1], axis=1) + return concat + + +class InceptionA(nn.Layer): + """InceptionA module for InceptionV4.""" + + def __init__(self, name: str): + super(InceptionA, self).__init__() + self._pool = AvgPool2d(kernel_size=3, stride=1, padding=1) + self._conv1 = ConvBNLayer(384, 96, 1, act="relu", name="inception_a" + name + "_1x1") + self._conv2 = ConvBNLayer(384, 96, 1, act="relu", name="inception_a" + name + "_1x1_2") + self._conv3_1 = ConvBNLayer(384, 64, 1, act="relu", name="inception_a" + name + "_3x3_reduce") + self._conv3_2 = ConvBNLayer(64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3") + self._conv4_1 = ConvBNLayer(384, 64, 1, act="relu", name="inception_a" + name + "_3x3_2_reduce") + self._conv4_2 = ConvBNLayer(64, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_2") + self._conv4_3 = ConvBNLayer(96, 96, 3, padding=1, act="relu", name="inception_a" + name + "_3x3_3") + + def forward(self, inputs: paddle.Tensor): + pool1 = self._pool(inputs) + conv1 = self._conv1(pool1) + + conv2 = self._conv2(inputs) + + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + + conv4 = self._conv4_1(inputs) + conv4 = self._conv4_2(conv4) + conv4 = self._conv4_3(conv4) + + concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1) + return concat + + +class ReductionA(nn.Layer): + """ReductionA module for InceptionV4.""" + + def __init__(self): + super(ReductionA, self).__init__() + self._pool = MaxPool2d(kernel_size=3, stride=2, padding=0) + self._conv2 = ConvBNLayer(384, 384, 3, stride=2, act="relu", name="reduction_a_3x3") + self._conv3_1 = ConvBNLayer(384, 192, 1, act="relu", name="reduction_a_3x3_2_reduce") + self._conv3_2 = ConvBNLayer(192, 224, 3, padding=1, act="relu", name="reduction_a_3x3_2") + self._conv3_3 = ConvBNLayer(224, 256, 3, stride=2, act="relu", name="reduction_a_3x3_3") + + def forward(self, inputs: paddle.Tensor): + pool1 = self._pool(inputs) + conv2 = self._conv2(inputs) + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + conv3 = self._conv3_3(conv3) + concat = paddle.concat([pool1, conv2, conv3], axis=1) + return concat + + +class InceptionB(nn.Layer): + """InceptionB module for InceptionV4.""" + + def __init__(self, name: str = None): + super(InceptionB, self).__init__() + self._pool = AvgPool2d(kernel_size=3, stride=1, padding=1) + self._conv1 = ConvBNLayer(1024, 128, 1, act="relu", name="inception_b" + name + "_1x1") + self._conv2 = ConvBNLayer(1024, 384, 1, act="relu", name="inception_b" + name + "_1x1_2") + self._conv3_1 = ConvBNLayer(1024, 192, 1, act="relu", name="inception_b" + name + "_1x7_reduce") + self._conv3_2 = ConvBNLayer(192, 224, (1, 7), padding=(0, 3), act="relu", name="inception_b" + name + "_1x7") + self._conv3_3 = ConvBNLayer(224, 256, (7, 1), padding=(3, 0), act="relu", name="inception_b" + name + "_7x1") + self._conv4_1 = ConvBNLayer(1024, 192, 1, act="relu", name="inception_b" + name + "_7x1_2_reduce") + self._conv4_2 = ConvBNLayer(192, 192, (1, 7), padding=(0, 3), act="relu", name="inception_b" + name + "_1x7_2") + self._conv4_3 = ConvBNLayer(192, 224, (7, 1), padding=(3, 0), act="relu", name="inception_b" + name + "_7x1_2") + self._conv4_4 = ConvBNLayer(224, 224, (1, 7), padding=(0, 3), act="relu", name="inception_b" + name + "_1x7_3") + self._conv4_5 = ConvBNLayer(224, 256, (7, 1), padding=(3, 0), act="relu", name="inception_b" + name + "_7x1_3") + + def forward(self, inputs: paddle.Tensor): + pool1 = self._pool(inputs) + conv1 = self._conv1(pool1) + + conv2 = self._conv2(inputs) + + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + conv3 = self._conv3_3(conv3) + + conv4 = self._conv4_1(inputs) + conv4 = self._conv4_2(conv4) + conv4 = self._conv4_3(conv4) + conv4 = self._conv4_4(conv4) + conv4 = self._conv4_5(conv4) + + concat = paddle.concat([conv1, conv2, conv3, conv4], axis=1) + return concat + + +class ReductionB(nn.Layer): + """ReductionB module for InceptionV4.""" + + def __init__(self): + super(ReductionB, self).__init__() + self._pool = MaxPool2d(kernel_size=3, stride=2, padding=0) + self._conv2_1 = ConvBNLayer(1024, 192, 1, act="relu", name="reduction_b_3x3_reduce") + self._conv2_2 = ConvBNLayer(192, 192, 3, stride=2, act="relu", name="reduction_b_3x3") + self._conv3_1 = ConvBNLayer(1024, 256, 1, act="relu", name="reduction_b_1x7_reduce") + self._conv3_2 = ConvBNLayer(256, 256, (1, 7), padding=(0, 3), act="relu", name="reduction_b_1x7") + self._conv3_3 = ConvBNLayer(256, 320, (7, 1), padding=(3, 0), act="relu", name="reduction_b_7x1") + self._conv3_4 = ConvBNLayer(320, 320, 3, stride=2, act="relu", name="reduction_b_3x3_2") + + def forward(self, inputs: paddle.Tensor): + pool1 = self._pool(inputs) + + conv2 = self._conv2_1(inputs) + conv2 = self._conv2_2(conv2) + + conv3 = self._conv3_1(inputs) + conv3 = self._conv3_2(conv3) + conv3 = self._conv3_3(conv3) + conv3 = self._conv3_4(conv3) + + concat = paddle.concat([pool1, conv2, conv3], axis=1) + + return concat + + +class InceptionC(nn.Layer): + """InceptionC module for InceptionV4.""" + + def __init__(self, name: str = None): + super(InceptionC, self).__init__() + self._pool = AvgPool2d(kernel_size=3, stride=1, padding=1) + self._conv1 = ConvBNLayer(1536, 256, 1, act="relu", name="inception_c" + name + "_1x1") + self._conv2 = ConvBNLayer(1536, 256, 1, act="relu", name="inception_c" + name + "_1x1_2") + self._conv3_0 = ConvBNLayer(1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_3") + self._conv3_1 = ConvBNLayer(384, 256, (1, 3), padding=(0, 1), act="relu", name="inception_c" + name + "_1x3") + self._conv3_2 = ConvBNLayer(384, 256, (3, 1), padding=(1, 0), act="relu", name="inception_c" + name + "_3x1") + self._conv4_0 = ConvBNLayer(1536, 384, 1, act="relu", name="inception_c" + name + "_1x1_4") + self._conv4_00 = ConvBNLayer(384, 448, (1, 3), padding=(0, 1), act="relu", name="inception_c" + name + "_1x3_2") + self._conv4_000 = ConvBNLayer( + 448, 512, (3, 1), padding=(1, 0), act="relu", name="inception_c" + name + "_3x1_2") + self._conv4_1 = ConvBNLayer(512, 256, (1, 3), padding=(0, 1), act="relu", name="inception_c" + name + "_1x3_3") + self._conv4_2 = ConvBNLayer(512, 256, (3, 1), padding=(1, 0), act="relu", name="inception_c" + name + "_3x1_3") + + def forward(self, inputs: paddle.Tensor): + pool1 = self._pool(inputs) + conv1 = self._conv1(pool1) + + conv2 = self._conv2(inputs) + + conv3 = self._conv3_0(inputs) + conv3_1 = self._conv3_1(conv3) + conv3_2 = self._conv3_2(conv3) + + conv4 = self._conv4_0(inputs) + conv4 = self._conv4_00(conv4) + conv4 = self._conv4_000(conv4) + conv4_1 = self._conv4_1(conv4) + conv4_2 = self._conv4_2(conv4) + + concat = paddle.concat([conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1) + + return concat + + +@moduleinfo( + name="inceptionv4_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="InceptionV4_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class InceptionV4(nn.Layer): + """InceptionV4 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(InceptionV4, self).__init__() + self._inception_stem = InceptionStem() + + self._inceptionA_1 = InceptionA(name="1") + self._inceptionA_2 = InceptionA(name="2") + self._inceptionA_3 = InceptionA(name="3") + self._inceptionA_4 = InceptionA(name="4") + self._reductionA = ReductionA() + + self._inceptionB_1 = InceptionB(name="1") + self._inceptionB_2 = InceptionB(name="2") + self._inceptionB_3 = InceptionB(name="3") + self._inceptionB_4 = InceptionB(name="4") + self._inceptionB_5 = InceptionB(name="5") + self._inceptionB_6 = InceptionB(name="6") + self._inceptionB_7 = InceptionB(name="7") + self._reductionB = ReductionB() + + self._inceptionC_1 = InceptionC(name="1") + self._inceptionC_2 = InceptionC(name="2") + self._inceptionC_3 = InceptionC(name="3") + + self.avg_pool = AdaptiveAvgPool2d(1) + self._drop = Dropout(p=0.2, mode="downscale_in_infer") + stdv = 1.0 / math.sqrt(1536 * 1.0) + self.out = Linear( + 1536, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="final_fc_weights"), + bias_attr=ParamAttr(name="final_fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'inceptionv4_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/inceptionv4_imagenet.pdparams -O' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs): + x = self._inception_stem(inputs) + + x = self._inceptionA_1(x) + x = self._inceptionA_2(x) + x = self._inceptionA_3(x) + x = self._inceptionA_4(x) + x = self._reductionA(x) + + x = self._inceptionB_1(x) + x = self._inceptionB_2(x) + x = self._inceptionB_3(x) + x = self._inceptionB_4(x) + x = self._inceptionB_5(x) + x = self._inceptionB_6(x) + x = self._inceptionB_7(x) + x = self._reductionB(x) + + x = self._inceptionC_1(x) + x = self._inceptionC_2(x) + x = self._inceptionC_3(x) + + x = self.avg_pool(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._drop(x) + x = self.out(x) + return x diff --git a/modules/image/classification/mobilenet_v1_imagenet/module.py b/modules/image/classification/mobilenet_v1_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..41dda6960f575ed2c3da4744d5cb91bce4d8e3cb --- /dev/null +++ b/modules/image/classification/mobilenet_v1_imagenet/module.py @@ -0,0 +1,263 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import MSRA +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + filter_size: int, + num_filters: int, + stride: int, + padding: int, + channels: int = None, + num_groups: int = 1, + act: str = 'relu', + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(initializer=MSRA(), name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name + "_bn_scale"), + bias_attr=ParamAttr(name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class DepthwiseSeparable(nn.Layer): + """Depthwise and pointwise conv layer.""" + + def __init__(self, + num_channels: int, + num_filters1: int, + num_filters2: int, + num_groups: int, + stride: int, + scale: float, + name: str = None): + super(DepthwiseSeparable, self).__init__() + + self._depthwise_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=int(num_filters1 * scale), + filter_size=3, + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + name=name + "_dw") + + self._pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + + def forward(self, inputs: paddle.Tensor): + y = self._depthwise_conv(inputs) + y = self._pointwise_conv(y) + return y + + +@moduleinfo( + name="mobilenet_v1_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="mobilenet_v1_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class MobileNetV1(nn.Layer): + """MobileNetV1""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(MobileNetV1, self).__init__() + self.block_list = [] + + self.conv1 = ConvBNLayer( + num_channels=3, filter_size=3, channels=3, num_filters=int(32), stride=2, padding=1, name="conv1") + + conv2_1 = self.add_sublayer( + "conv2_1", + sublayer=DepthwiseSeparable( + num_channels=int(32), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=1, + name="conv2_1")) + self.block_list.append(conv2_1) + + conv2_2 = self.add_sublayer( + "conv2_2", + sublayer=DepthwiseSeparable( + num_channels=int(64), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=1, + name="conv2_2")) + self.block_list.append(conv2_2) + + conv3_1 = self.add_sublayer( + "conv3_1", + sublayer=DepthwiseSeparable( + num_channels=int(128), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=1, + name="conv3_1")) + self.block_list.append(conv3_1) + + conv3_2 = self.add_sublayer( + "conv3_2", + sublayer=DepthwiseSeparable( + num_channels=int(128), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=1, + name="conv3_2")) + self.block_list.append(conv3_2) + + conv4_1 = self.add_sublayer( + "conv4_1", + sublayer=DepthwiseSeparable( + num_channels=int(256), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=1, + name="conv4_1")) + self.block_list.append(conv4_1) + + conv4_2 = self.add_sublayer( + "conv4_2", + sublayer=DepthwiseSeparable( + num_channels=int(256), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=1, + name="conv4_2")) + self.block_list.append(conv4_2) + + for i in range(5): + conv5 = self.add_sublayer( + "conv5_" + str(i + 1), + sublayer=DepthwiseSeparable( + num_channels=int(512), + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=1, + name="conv5_" + str(i + 1))) + self.block_list.append(conv5) + + conv5_6 = self.add_sublayer( + "conv5_6", + sublayer=DepthwiseSeparable( + num_channels=int(512), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=1, + name="conv5_6")) + self.block_list.append(conv5_6) + + conv6 = self.add_sublayer( + "conv6", + sublayer=DepthwiseSeparable( + num_channels=int(1024), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=1, + name="conv6")) + self.block_list.append(conv6) + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.out = Linear( + int(1024), + class_dim, + weight_attr=ParamAttr(initializer=MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'mobilenet_v1_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v1_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1(inputs) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, 1024]) + y = self.out(y) + return y diff --git a/modules/image/classification/mobilenet_v1_imagenet_ssld/module.py b/modules/image/classification/mobilenet_v1_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c4e551eee45a946e47e044db8ba8c92c2bf54a1e --- /dev/null +++ b/modules/image/classification/mobilenet_v1_imagenet_ssld/module.py @@ -0,0 +1,263 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import MSRA +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + filter_size: int, + num_filters: int, + stride: int, + padding: int, + channels: int = None, + num_groups: int = 1, + act: str = 'relu', + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(initializer=MSRA(), name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name + "_bn_scale"), + bias_attr=ParamAttr(name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class DepthwiseSeparable(nn.Layer): + """Depthwise and pointwise conv layer.""" + + def __init__(self, + num_channels: int, + num_filters1: int, + num_filters2: int, + num_groups: int, + stride: int, + scale: float, + name: str = None): + super(DepthwiseSeparable, self).__init__() + + self._depthwise_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=int(num_filters1 * scale), + filter_size=3, + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + name=name + "_dw") + + self._pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + + def forward(self, inputs: paddle.Tensor): + y = self._depthwise_conv(inputs) + y = self._pointwise_conv(y) + return y + + +@moduleinfo( + name="mobilenet_v1_imagenet_ssld", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="mobilenet_v1_imagenet_ssld is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class MobileNetV1(nn.Layer): + """MobileNetV1""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(MobileNetV1, self).__init__() + self.block_list = [] + + self.conv1 = ConvBNLayer( + num_channels=3, filter_size=3, channels=3, num_filters=int(32), stride=2, padding=1, name="conv1") + + conv2_1 = self.add_sublayer( + "conv2_1", + sublayer=DepthwiseSeparable( + num_channels=int(32), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=1, + name="conv2_1")) + self.block_list.append(conv2_1) + + conv2_2 = self.add_sublayer( + "conv2_2", + sublayer=DepthwiseSeparable( + num_channels=int(64), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=1, + name="conv2_2")) + self.block_list.append(conv2_2) + + conv3_1 = self.add_sublayer( + "conv3_1", + sublayer=DepthwiseSeparable( + num_channels=int(128), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=1, + name="conv3_1")) + self.block_list.append(conv3_1) + + conv3_2 = self.add_sublayer( + "conv3_2", + sublayer=DepthwiseSeparable( + num_channels=int(128), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=1, + name="conv3_2")) + self.block_list.append(conv3_2) + + conv4_1 = self.add_sublayer( + "conv4_1", + sublayer=DepthwiseSeparable( + num_channels=int(256), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=1, + name="conv4_1")) + self.block_list.append(conv4_1) + + conv4_2 = self.add_sublayer( + "conv4_2", + sublayer=DepthwiseSeparable( + num_channels=int(256), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=1, + name="conv4_2")) + self.block_list.append(conv4_2) + + for i in range(5): + conv5 = self.add_sublayer( + "conv5_" + str(i + 1), + sublayer=DepthwiseSeparable( + num_channels=int(512), + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=1, + name="conv5_" + str(i + 1))) + self.block_list.append(conv5) + + conv5_6 = self.add_sublayer( + "conv5_6", + sublayer=DepthwiseSeparable( + num_channels=int(512), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=1, + name="conv5_6")) + self.block_list.append(conv5_6) + + conv6 = self.add_sublayer( + "conv6", + sublayer=DepthwiseSeparable( + num_channels=int(1024), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=1, + name="conv6")) + self.block_list.append(conv6) + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.out = Linear( + int(1024), + class_dim, + weight_attr=ParamAttr(initializer=MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'mobilenet_v1_ssld_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v1_ssld_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1(inputs) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, 1024]) + y = self.out(y) + return y diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/README.md b/modules/image/classification/mobilenet_v2_animals/README.md similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_animals/README.md rename to modules/image/classification/mobilenet_v2_animals/README.md diff --git a/hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/__init__.py b/modules/image/classification/mobilenet_v2_animals/__init__.py similarity index 100% rename from hub_module/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/__init__.py rename to modules/image/classification/mobilenet_v2_animals/__init__.py diff --git a/modules/image/classification/mobilenet_v2_animals/data_feed.py b/modules/image/classification/mobilenet_v2_animals/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_animals/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/label_list.txt b/modules/image/classification/mobilenet_v2_animals/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_animals/label_list.txt rename to modules/image/classification/mobilenet_v2_animals/label_list.txt diff --git a/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py b/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..7186d5802964664ad7674b208cf295d65c8931bf --- /dev/null +++ b/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py @@ -0,0 +1,182 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['MobileNetV2'] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class MobileNetV2(): + def __init__(self): + self.params = train_parameters + + def net(self, input, class_dim=1000, scale=1.0): + + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + #conv1 + input = self.conv_bn_layer( + input, num_filters=int(32 * scale), filter_size=3, stride=2, padding=1, if_act=True, name='conv1_1') + + # bottleneck sequences + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + input = self.invresi_blocks(input=input, in_c=in_c, t=t, c=int(c * scale), n=n, s=s, name='conv' + str(i)) + in_c = int(c * scale) + #last_conv + input = self.conv_bn_layer( + input=input, + num_filters=int(1280 * scale) if scale > 1.0 else 1280, + filter_size=1, + stride=1, + padding=0, + if_act=True, + name='conv9') + + input = fluid.layers.pool2d(input=input, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True) + + output = fluid.layers.fc( + input=input, + size=class_dim, + param_attr=ParamAttr(name='fc10_weights'), + bias_attr=ParamAttr(name='fc10_offset')) + return output, input + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.relu6(bn) + else: + return bn + + def shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = self.conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = self.conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + linear_out = self.conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + if ifshortcut: + out = self.shortcut(input=input, data_residual=linear_out) + return out + else: + return linear_out + + def invresi_blocks(self, input, in_c, t, c, n, s, name=None): + first_block = self.inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block = self.inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block diff --git a/modules/image/classification/mobilenet_v2_animals/module.py b/modules/image/classification/mobilenet_v2_animals/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c691a41e90c60def9b42a8fc246e2a900e86ee01 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_animals/module.py @@ -0,0 +1,228 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from mobilenet_v2_animals.processor import postprocess, base64_to_cv2 +from mobilenet_v2_animals.data_feed import reader +from mobilenet_v2_animals.mobilenet_v2 import MobileNetV2 + + +@moduleinfo( + name="mobilenet_v2_animals", + type="CV/image_classification", + author="baidu-vis", + author_email="", + summary= + "Mobilenet_V2 is a image classfication model, this module is trained with Baidu's self-built animals dataset.", + version="1.0.0") +class MobileNetV2Animals(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + mobile_net = MobileNetV2() + output, feature_map = mobile_net.net(input=image, class_dim=len(self.label_list), scale=1.0) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/processor.py b/modules/image/classification/mobilenet_v2_animals/processor.py similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_animals/processor.py rename to modules/image/classification/mobilenet_v2_animals/processor.py diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/README.md b/modules/image/classification/mobilenet_v2_dishes/README.md similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_dishes/README.md rename to modules/image/classification/mobilenet_v2_dishes/README.md diff --git a/hub_module/modules/image/classification/mobilenet_v2_animals/__init__.py b/modules/image/classification/mobilenet_v2_dishes/__init__.py similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_animals/__init__.py rename to modules/image/classification/mobilenet_v2_dishes/__init__.py diff --git a/modules/image/classification/mobilenet_v2_dishes/data_feed.py b/modules/image/classification/mobilenet_v2_dishes/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_dishes/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/label_list.txt b/modules/image/classification/mobilenet_v2_dishes/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_dishes/label_list.txt rename to modules/image/classification/mobilenet_v2_dishes/label_list.txt diff --git a/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py b/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..7186d5802964664ad7674b208cf295d65c8931bf --- /dev/null +++ b/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py @@ -0,0 +1,182 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['MobileNetV2'] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class MobileNetV2(): + def __init__(self): + self.params = train_parameters + + def net(self, input, class_dim=1000, scale=1.0): + + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + #conv1 + input = self.conv_bn_layer( + input, num_filters=int(32 * scale), filter_size=3, stride=2, padding=1, if_act=True, name='conv1_1') + + # bottleneck sequences + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + input = self.invresi_blocks(input=input, in_c=in_c, t=t, c=int(c * scale), n=n, s=s, name='conv' + str(i)) + in_c = int(c * scale) + #last_conv + input = self.conv_bn_layer( + input=input, + num_filters=int(1280 * scale) if scale > 1.0 else 1280, + filter_size=1, + stride=1, + padding=0, + if_act=True, + name='conv9') + + input = fluid.layers.pool2d(input=input, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True) + + output = fluid.layers.fc( + input=input, + size=class_dim, + param_attr=ParamAttr(name='fc10_weights'), + bias_attr=ParamAttr(name='fc10_offset')) + return output, input + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.relu6(bn) + else: + return bn + + def shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = self.conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = self.conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + linear_out = self.conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + if ifshortcut: + out = self.shortcut(input=input, data_residual=linear_out) + return out + else: + return linear_out + + def invresi_blocks(self, input, in_c, t, c, n, s, name=None): + first_block = self.inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block = self.inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block diff --git a/modules/image/classification/mobilenet_v2_dishes/module.py b/modules/image/classification/mobilenet_v2_dishes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d0b6ed8b80cbca5bc2c63d46eb97e28031b38ab9 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_dishes/module.py @@ -0,0 +1,228 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from mobilenet_v2_dishes.processor import postprocess, base64_to_cv2 +from mobilenet_v2_dishes.data_feed import reader +from mobilenet_v2_dishes.mobilenet_v2 import MobileNetV2 + + +@moduleinfo( + name="mobilenet_v2_dishes", + type="CV/image_classification", + author="baidu-vis", + author_email="", + summary= + "Mobilenet_V2 is a image classfication model, this module is trained with Baidu's self-built dishes dataset.", + version="1.0.0") +class MobileNetV2Dishes(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + mobile_net = MobileNetV2() + output, feature_map = mobile_net.net(input=image, class_dim=len(self.label_list), scale=1.0) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR. . + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/processor.py b/modules/image/classification/mobilenet_v2_dishes/processor.py similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_dishes/processor.py rename to modules/image/classification/mobilenet_v2_dishes/processor.py diff --git a/modules/image/classification/mobilenet_v2_imagenet/module.py b/modules/image/classification/mobilenet_v2_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..a9245d6743e07cae1d173480a1c9f9cb036f14ec --- /dev/null +++ b/modules/image/classification/mobilenet_v2_imagenet/module.py @@ -0,0 +1,212 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + filter_size: int, + num_filters: int, + stride: int, + padding: int, + num_groups: int = 1, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs: paddle.Tensor, if_act: bool = True): + y = self._conv(inputs) + y = self._batch_norm(y) + if if_act: + y = F.relu6(y) + return y + + +class InvertedResidualUnit(nn.Layer): + """Inverted Residual unit.""" + + def __init__(self, num_channels: int, num_in_filter: int, num_filters: int, stride: int, filter_size: int, + padding: int, expansion_factor: int, name: str): + super(InvertedResidualUnit, self).__init__() + + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_expand") + + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + name=name + "_dwise") + + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_linear") + + def forward(self, inputs: paddle.Tensor, ifshortcut: bool): + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) + if ifshortcut: + y = paddle.elementwise_add(inputs, y) + return y + + +class InversiBlocks(nn.Layer): + """Inverted residual block composed by inverted residual unit.""" + + def __init__(self, in_c: int, t: int, c: int, n: int, s: int, name: str): + super(InversiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_1") + + self._block_list = [] + for i in range(1, n): + block = self.add_sublayer( + name + "_" + str(i + 1), + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_" + str(i + 1))) + self._block_list.append(block) + + def forward(self, inputs: paddle.Tensor): + y = self._first_block(inputs, ifshortcut=False) + for block in self._block_list: + y = block(y, ifshortcut=True) + return y + + +@moduleinfo( + name="mobilenet_v2_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="mobilenet_v2_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class MobileNet(nn.Layer): + """MobileNetV2""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(MobileNet, self).__init__() + + self.class_dim = class_dim + + bottleneck_params_list = [(1, 16, 1, 1), (6, 24, 2, 2), (6, 32, 3, 2), (6, 64, 4, 2), (6, 96, 3, 1), + (6, 160, 3, 2), (6, 320, 1, 1)] + + self.conv1 = ConvBNLayer( + num_channels=3, num_filters=int(32), filter_size=3, stride=2, padding=1, name="conv1_1") + + self.block_list = [] + i = 1 + in_c = int(32) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + block = self.add_sublayer( + "conv" + str(i), sublayer=InversiBlocks(in_c=in_c, t=t, c=int(c), n=n, s=s, name="conv" + str(i))) + self.block_list.append(block) + in_c = int(c) + + self.out_c = 1280 + self.conv9 = ConvBNLayer( + num_channels=in_c, num_filters=self.out_c, filter_size=1, stride=1, padding=0, name="conv9") + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.out = Linear( + self.out_c, class_dim, weight_attr=ParamAttr(name="fc10_weights"), bias_attr=ParamAttr(name="fc10_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'mobilenet_v2_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v2_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1(inputs, if_act=True) + for block in self.block_list: + y = block(y) + y = self.conv9(y, if_act=True) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.out_c]) + y = self.out(y) + return y diff --git a/modules/image/classification/mobilenet_v2_imagenet_ssld/module.py b/modules/image/classification/mobilenet_v2_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..443a1d99a72570d0371162338037870cd20c1930 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_imagenet_ssld/module.py @@ -0,0 +1,212 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + filter_size: int, + num_filters: int, + stride: int, + padding: int, + num_groups: int = 1, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs: paddle.Tensor, if_act: bool = True): + y = self._conv(inputs) + y = self._batch_norm(y) + if if_act: + y = F.relu6(y) + return y + + +class InvertedResidualUnit(nn.Layer): + """Inverted Residual unit.""" + + def __init__(self, num_channels: int, num_in_filter: int, num_filters: int, stride: int, filter_size: int, + padding: int, expansion_factor: int, name: str): + super(InvertedResidualUnit, self).__init__() + + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_expand") + + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + name=name + "_dwise") + + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_linear") + + def forward(self, inputs: paddle.Tensor, ifshortcut: bool): + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) + if ifshortcut: + y = paddle.elementwise_add(inputs, y) + return y + + +class InversiBlocks(nn.Layer): + """Inverted residual block composed by inverted residual unit.""" + + def __init__(self, in_c: int, t: int, c: int, n: int, s: int, name: str): + super(InversiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_1") + + self._block_list = [] + for i in range(1, n): + block = self.add_sublayer( + name + "_" + str(i + 1), + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_" + str(i + 1))) + self._block_list.append(block) + + def forward(self, inputs: paddle.Tensor): + y = self._first_block(inputs, ifshortcut=False) + for block in self._block_list: + y = block(y, ifshortcut=True) + return y + + +@moduleinfo( + name="mobilenet_v2_imagenet_ssld", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="mobilenet_v2_imagenet_ssld is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class MobileNet(nn.Layer): + """MobileNetV2""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(MobileNet, self).__init__() + + self.class_dim = class_dim + + bottleneck_params_list = [(1, 16, 1, 1), (6, 24, 2, 2), (6, 32, 3, 2), (6, 64, 4, 2), (6, 96, 3, 1), + (6, 160, 3, 2), (6, 320, 1, 1)] + + self.conv1 = ConvBNLayer( + num_channels=3, num_filters=int(32), filter_size=3, stride=2, padding=1, name="conv1_1") + + self.block_list = [] + i = 1 + in_c = int(32) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + block = self.add_sublayer( + "conv" + str(i), sublayer=InversiBlocks(in_c=in_c, t=t, c=int(c), n=n, s=s, name="conv" + str(i))) + self.block_list.append(block) + in_c = int(c) + + self.out_c = 1280 + self.conv9 = ConvBNLayer( + num_channels=in_c, num_filters=self.out_c, filter_size=1, stride=1, padding=0, name="conv9") + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.out = Linear( + self.out_c, class_dim, weight_attr=ParamAttr(name="fc10_weights"), bias_attr=ParamAttr(name="fc10_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'mobilenet_v2_ssld.pdparams.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v2_ssld.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1(inputs, if_act=True) + for block in self.block_list: + y = block(y) + y = self.conv9(y, if_act=True) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.out_c]) + y = self.out(y) + return y diff --git a/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py b/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..afb3ab70a9a8c08331a8d3a6f679476f3153fea9 --- /dev/null +++ b/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py @@ -0,0 +1,286 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.regularizer import L2Decay +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def make_divisible(v: int, divisor: int = 8, min_value: int = None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +@moduleinfo( + name="mobilenet_v3_large_imagenet_ssld", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="mobilenet_v3_large_imagenet_ssld is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class MobileNetV3Large(nn.Layer): + """MobileNetV3Large module.""" + + def __init__(self, dropout_prob: float = 0.2, class_dim: int = 1000, load_checkpoint: str = None): + super(MobileNetV3Large, self).__init__() + + inplanes = 16 + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], + [3, 240, 80, False, "hard_swish", 2], + [3, 200, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 184, 80, False, "hard_swish", 1], + [3, 480, 112, True, "hard_swish", 1], + [3, 672, 112, True, "hard_swish", 1], + [5, 672, 160, True, "hard_swish", 2], + [5, 960, 160, True, "hard_swish", 1], + [5, 960, 160, True, "hard_swish", 1] + ] + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + + self.conv1 = ConvBNLayer( + in_c=3, + out_c=make_divisible(inplanes), + filter_size=3, + stride=2, + padding=1, + num_groups=1, + if_act=True, + act="hard_swish", + name="conv1") + + self.block_list = [] + i = 0 + inplanes = make_divisible(inplanes) + for (k, exp, c, se, nl, s) in self.cfg: + self.block_list.append( + ResidualUnit( + in_c=inplanes, + mid_c=make_divisible(exp), + out_c=make_divisible(c), + filter_size=k, + stride=s, + use_se=se, + act=nl, + name="conv" + str(i + 2))) + self.add_sublayer(sublayer=self.block_list[-1], name="conv" + str(i + 2)) + inplanes = make_divisible(c) + i += 1 + + self.last_second_conv = ConvBNLayer( + in_c=inplanes, + out_c=make_divisible(self.cls_ch_squeeze), + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act="hard_swish", + name="conv_last") + + self.pool = AdaptiveAvgPool2d(1) + + self.last_conv = Conv2d( + in_channels=make_divisible(self.cls_ch_squeeze), + out_channels=self.cls_ch_expand, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(name="last_1x1_conv_weights"), + bias_attr=False) + + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + + self.out = Linear( + self.cls_ch_expand, class_dim, weight_attr=ParamAttr("fc_weights"), bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'mobilenet_v3_large_ssld.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v3_large_ssld.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self.conv1(inputs) + for block in self.block_list: + x = block(x) + + x = self.last_second_conv(x) + x = self.pool(x) + + x = self.last_conv(x) + x = F.hard_swish(x) + x = self.dropout(x) + x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) + x = self.out(x) + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + in_c: int, + out_c: int, + filter_size: int, + stride: int, + padding: int, + num_groups: int = 1, + if_act: bool = True, + act: str = None, + name: str = ""): + super(ConvBNLayer, self).__init__() + self.if_act = if_act + self.act = act + self.conv = Conv2d( + in_channels=in_c, + out_channels=out_c, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + self.bn = BatchNorm( + num_channels=out_c, + act=None, + param_attr=ParamAttr(name=name + "_bn_scale", regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(name=name + "_bn_offset", regularizer=L2Decay(0.0)), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, x: paddle.Tensor): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + if self.act == "relu": + x = F.relu(x) + elif self.act == "hard_swish": + x = F.hard_swish(x) + else: + print("The activation function is selected incorrectly.") + exit() + return x + + +class ResidualUnit(nn.Layer): + """Residual unit for MobileNetV3.""" + + def __init__(self, + in_c: int, + mid_c: int, + out_c: int, + filter_size: int, + stride: int, + use_se: int, + act: str = None, + name: str = ''): + super(ResidualUnit, self).__init__() + self.if_shortcut = stride == 1 and in_c == out_c + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_c=in_c, out_c=mid_c, filter_size=1, stride=1, padding=0, if_act=True, act=act, name=name + "_expand") + self.bottleneck_conv = ConvBNLayer( + in_c=mid_c, + out_c=mid_c, + filter_size=filter_size, + stride=stride, + padding=int((filter_size - 1) // 2), + num_groups=mid_c, + if_act=True, + act=act, + name=name + "_depthwise") + if self.if_se: + self.mid_se = SEModule(mid_c, name=name + "_se") + self.linear_conv = ConvBNLayer( + in_c=mid_c, out_c=out_c, filter_size=1, stride=1, padding=0, if_act=False, act=None, name=name + "_linear") + + def forward(self, inputs: paddle.Tensor): + x = self.expand_conv(inputs) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = paddle.elementwise_add(inputs, x) + return x + + +class SEModule(nn.Layer): + """Basic model for ResidualUnit.""" + + def __init__(self, channel: int, reduction: int = 4, name: str = ""): + super(SEModule, self).__init__() + self.avg_pool = AdaptiveAvgPool2d(1) + self.conv1 = Conv2d( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(name=name + "_1_weights"), + bias_attr=ParamAttr(name=name + "_1_offset")) + self.conv2 = Conv2d( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(name + "_2_weights"), + bias_attr=ParamAttr(name=name + "_2_offset")) + + def forward(self, inputs: paddle.Tensor): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = F.relu(outputs) + outputs = self.conv2(outputs) + outputs = F.hard_sigmoid(outputs) + return paddle.multiply(x=inputs, y=outputs, axis=0) diff --git a/modules/image/classification/mobilenet_v3_small_imagenet_ssld/module.py b/modules/image/classification/mobilenet_v3_small_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..22955f9c732f7e90db20aafd32572e8057be44d0 --- /dev/null +++ b/modules/image/classification/mobilenet_v3_small_imagenet_ssld/module.py @@ -0,0 +1,276 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.regularizer import L2Decay +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +@moduleinfo( + name="mobilenet_v3_small_imagenet_ssld", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="mobilenet_v3_small_imagenet_ssld is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class MobileNetV3Small(nn.Layer): + """MobileNetV3Small module.""" + + def __init__(self, dropout_prob: float = 0.2, class_dim: int = 1000, load_checkpoint: str = None): + super(MobileNetV3Small, self).__init__() + + inplanes = 16 + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hard_swish", 2], + [5, 240, 40, True, "hard_swish", 1], + [5, 240, 40, True, "hard_swish", 1], + [5, 120, 48, True, "hard_swish", 1], + [5, 144, 48, True, "hard_swish", 1], + [5, 288, 96, True, "hard_swish", 2], + [5, 576, 96, True, "hard_swish", 1], + [5, 576, 96, True, "hard_swish", 1], + ] + self.cls_ch_squeeze = 576 + self.cls_ch_expand = 1280 + + self.conv1 = ConvBNLayer( + in_c=3, + out_c=make_divisible(inplanes), + filter_size=3, + stride=2, + padding=1, + num_groups=1, + if_act=True, + act="hard_swish", + name="conv1") + + self.block_list = [] + i = 0 + inplanes = make_divisible(inplanes) + for (k, exp, c, se, nl, s) in self.cfg: + self.block_list.append( + ResidualUnit( + in_c=inplanes, + mid_c=make_divisible(exp), + out_c=make_divisible(c), + filter_size=k, + stride=s, + use_se=se, + act=nl, + name="conv" + str(i + 2))) + self.add_sublayer(sublayer=self.block_list[-1], name="conv" + str(i + 2)) + inplanes = make_divisible(c) + i += 1 + + self.last_second_conv = ConvBNLayer( + in_c=inplanes, + out_c=make_divisible(self.cls_ch_squeeze), + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act="hard_swish", + name="conv_last") + + self.pool = AdaptiveAvgPool2d(1) + + self.last_conv = Conv2d( + in_channels=make_divisible(self.cls_ch_squeeze), + out_channels=self.cls_ch_expand, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(name="last_1x1_conv_weights"), + bias_attr=False) + + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + + self.out = Linear( + self.cls_ch_expand, class_dim, weight_attr=ParamAttr("fc_weights"), bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'mobilenet_v3_small_ssld.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v3_small_ssld.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self.conv1(inputs) + for block in self.block_list: + x = block(x) + + x = self.last_second_conv(x) + x = self.pool(x) + + x = self.last_conv(x) + x = F.hard_swish(x) + x = self.dropout(x) + x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) + x = self.out(x) + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + in_c: int, + out_c: int, + filter_size: int, + stride: int, + padding: int, + num_groups: int = 1, + if_act: bool = True, + act: str = None, + name: str = ""): + super(ConvBNLayer, self).__init__() + self.if_act = if_act + self.act = act + self.conv = Conv2d( + in_channels=in_c, + out_channels=out_c, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + self.bn = BatchNorm( + num_channels=out_c, + act=None, + param_attr=ParamAttr(name=name + "_bn_scale", regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(name=name + "_bn_offset", regularizer=L2Decay(0.0)), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + if self.act == "relu": + x = F.relu(x) + elif self.act == "hard_swish": + x = F.hard_swish(x) + else: + print("The activation function is selected incorrectly.") + exit() + return x + + +class ResidualUnit(nn.Layer): + """Residual unit for MobileNetV3.""" + + def __init__(self, + in_c: int, + mid_c: int, + out_c: int, + filter_size: int, + stride: int, + use_se: bool, + act: str = None, + name: str = ''): + super(ResidualUnit, self).__init__() + self.if_shortcut = stride == 1 and in_c == out_c + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_c=in_c, out_c=mid_c, filter_size=1, stride=1, padding=0, if_act=True, act=act, name=name + "_expand") + self.bottleneck_conv = ConvBNLayer( + in_c=mid_c, + out_c=mid_c, + filter_size=filter_size, + stride=stride, + padding=int((filter_size - 1) // 2), + num_groups=mid_c, + if_act=True, + act=act, + name=name + "_depthwise") + if self.if_se: + self.mid_se = SEModule(mid_c, name=name + "_se") + self.linear_conv = ConvBNLayer( + in_c=mid_c, out_c=out_c, filter_size=1, stride=1, padding=0, if_act=False, act=None, name=name + "_linear") + + def forward(self, inputs: paddle.Tensor): + x = self.expand_conv(inputs) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = paddle.elementwise_add(inputs, x) + return x + + +class SEModule(nn.Layer): + """Basic model for ResidualUnit.""" + + def __init__(self, channel: int, reduction: int = 4, name: str = ""): + super(SEModule, self).__init__() + self.avg_pool = AdaptiveAvgPool2d(1) + self.conv1 = Conv2d( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(name=name + "_1_weights"), + bias_attr=ParamAttr(name=name + "_1_offset")) + self.conv2 = Conv2d( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(name + "_2_weights"), + bias_attr=ParamAttr(name=name + "_2_offset")) + + def forward(self, inputs: paddle.Tensor): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = F.relu(outputs) + outputs = self.conv2(outputs) + outputs = F.hard_sigmoid(outputs) + return paddle.multiply(x=inputs, y=outputs, axis=0) diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md b/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md similarity index 100% rename from hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md rename to modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md diff --git a/hub_module/modules/image/classification/mobilenet_v2_dishes/__init__.py b/modules/image/classification/res2net101_vd_26w_4s_imagenet/__init__.py similarity index 100% rename from hub_module/modules/image/classification/mobilenet_v2_dishes/__init__.py rename to modules/image/classification/res2net101_vd_26w_4s_imagenet/__init__.py diff --git a/modules/image/classification/res2net101_vd_26w_4s_imagenet/data_feed.py b/modules/image/classification/res2net101_vd_26w_4s_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/res2net101_vd_26w_4s_imagenet/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/label_list.txt b/modules/image/classification/res2net101_vd_26w_4s_imagenet/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/label_list.txt rename to modules/image/classification/res2net101_vd_26w_4s_imagenet/label_list.txt diff --git a/modules/image/classification/res2net101_vd_26w_4s_imagenet/module.py b/modules/image/classification/res2net101_vd_26w_4s_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..24dfbe74054c566264998b3344bb98ff3449af2c --- /dev/null +++ b/modules/image/classification/res2net101_vd_26w_4s_imagenet/module.py @@ -0,0 +1,222 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from res2net101_vd_26w_4s_imagenet.processor import postprocess, base64_to_cv2 +from res2net101_vd_26w_4s_imagenet.data_feed import reader +from res2net101_vd_26w_4s_imagenet.res2net_vd import Res2Net101_vd_26w_4s + + +@moduleinfo( + name="res2net101_vd_26w_4s_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="res2net101_vd_26w_4s is a image classfication model, this module is trained with imagenet datasets.", + version="1.0.0") +class Res2Net101vd26w4sImagenet(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "res2net101_vd_26w_4s_imagenet_model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self.predictor_set = False + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + resnet_vd = Res2Net101_vd_26w_4s() + output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if not self.predictor_set: + self._set_config() + self.predictor_set = True + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/processor.py b/modules/image/classification/res2net101_vd_26w_4s_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/processor.py rename to modules/image/classification/res2net101_vd_26w_4s_imagenet/processor.py diff --git a/modules/image/classification/res2net101_vd_26w_4s_imagenet/res2net_vd.py b/modules/image/classification/res2net101_vd_26w_4s_imagenet/res2net_vd.py new file mode 100644 index 0000000000000000000000000000000000000000..68d8c50fd873b72e65c63a6567990539b1a334bd --- /dev/null +++ b/modules/image/classification/res2net101_vd_26w_4s_imagenet/res2net_vd.py @@ -0,0 +1,222 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr +__all__ = [ + "Res2Net_vd", "Res2Net50_vd_48w_2s", "Res2Net50_vd_26w_4s", "Res2Net50_vd_14w_8s", "Res2Net50_vd_26w_6s", + "Res2Net50_vd_26w_8s", "Res2Net101_vd_26w_4s", "Res2Net152_vd_26w_4s", "Res2Net200_vd_26w_4s" +] + + +class Res2Net_vd(): + def __init__(self, layers=50, scales=4, width=26): + self.layers = layers + self.scales = scales + self.width = width + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + basic_width = self.width * self.scales + num_filters1 = [basic_width * t for t in [1, 2, 4, 8]] + num_filters2 = [256 * t for t in [1, 2, 4, 8]] + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters1=num_filters1[block], + num_filters2=num_filters2[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters1, num_filters2, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters1, filter_size=1, stride=1, act='relu', name=name + '_branch2a') + + xs = fluid.layers.split(conv0, self.scales, 1) + ys = [] + for s in range(self.scales - 1): + if s == 0 or stride == 2: + ys.append( + self.conv_bn_layer( + input=xs[s], + num_filters=num_filters1 // self.scales, + stride=stride, + filter_size=3, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + else: + ys.append( + self.conv_bn_layer( + input=xs[s] + ys[-1], + num_filters=num_filters1 // self.scales, + stride=stride, + filter_size=3, + act='relu', + name=name + '_branch2b_' + str(s + 1))) + + if stride == 1: + ys.append(xs[-1]) + else: + ys.append( + fluid.layers.pool2d(input=xs[-1], pool_size=3, pool_stride=stride, pool_padding=1, pool_type='avg')) + + conv1 = fluid.layers.concat(ys, axis=1) + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters2, filter_size=1, act=None, name=name + "_branch2c") + + short = self.shortcut(input, num_filters2, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def Res2Net50_vd_48w_2s(): + model = Res2Net_vd(layers=50, scales=2, width=48) + return model + + +def Res2Net50_vd_26w_4s(): + model = Res2Net_vd(layers=50, scales=4, width=26) + return model + + +def Res2Net50_vd_14w_8s(): + model = Res2Net_vd(layers=50, scales=8, width=14) + return model + + +def Res2Net50_vd_26w_6s(): + model = Res2Net_vd(layers=50, scales=6, width=26) + return model + + +def Res2Net50_vd_26w_8s(): + model = Res2Net_vd(layers=50, scales=8, width=26) + return model + + +def Res2Net101_vd_26w_4s(): + model = Res2Net_vd(layers=101, scales=4, width=26) + return model + + +def Res2Net152_vd_26w_4s(): + model = Res2Net_vd(layers=152, scales=4, width=26) + return model + + +def Res2Net200_vd_26w_4s(): + model = Res2Net_vd(layers=200, scales=4, width=26) + return model diff --git a/modules/image/classification/resnet101_imagenet/module.py b/modules/image/classification/resnet101_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..87a549398053350f801d5dd8dd3d4b9dbe7bc6ef --- /dev/null +++ b/modules/image/classification/resnet101_imagenet/module.py @@ -0,0 +1,232 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet101.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act="relu", name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet101.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act="relu") + return y + + +@moduleinfo( + name="resnet101_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet101_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet101(nn.Layer): + """ResNet101 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet101, self).__init__() + + self.layers = 101 + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + conv_name, + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet101_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet101_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet101_vd_imagenet/module.py b/modules/image/classification/resnet101_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ef3fbb865afc9e40ba2a7b0aab0de98ef3546cfd --- /dev/null +++ b/modules/image/classification/resnet101_vd_imagenet/module.py @@ -0,0 +1,254 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet101_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet101_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet101_vd_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet101_vd_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet101_vd(nn.Layer): + """ResNet101_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet101_vd, self).__init__() + + self.layers = 101 + + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet101_vd_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet101_vd_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet101_vd_imagenet_ssld/module.py b/modules/image/classification/resnet101_vd_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc3e38270ad833be2486df14c7900828f057721 --- /dev/null +++ b/modules/image/classification/resnet101_vd_imagenet_ssld/module.py @@ -0,0 +1,254 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet101_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet101_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet101_vd_imagenet_ssld", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet101_vd_imagenet_ssld is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet101_vd(nn.Layer): + """ResNet101_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet101_vd, self).__init__() + + self.layers = 101 + + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet101_vd_ssld_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet101_vd_ssld_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet152_imagenet/module.py b/modules/image/classification/resnet152_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c77bdea4bd297e9158cb60d2a7b9438407b7e6e5 --- /dev/null +++ b/modules/image/classification/resnet152_imagenet/module.py @@ -0,0 +1,233 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet152.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act="relu", name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet152.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act="relu") + return y + + +@moduleinfo( + name="resnet152_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet152_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet152(nn.Layer): + """ResNet152 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet152, self).__init__() + + self.layers = 152 + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + bottleneck_block = self.add_sublayer( + conv_name, + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet152_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet152_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet152_vd_imagenet/module.py b/modules/image/classification/resnet152_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..64726cbcc4e535ef07d326431c5d191725e5ace3 --- /dev/null +++ b/modules/image/classification/resnet152_vd_imagenet/module.py @@ -0,0 +1,254 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet152_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet152_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet152_vd_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet152_vd_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet152_vd(nn.Layer): + """ResNet152_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet152_vd, self).__init__() + + self.layers = 152 + + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet152_vd_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet152_vd_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet18_imagenet/module.py b/modules/image/classification/resnet18_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..70784673ab178c73cd1e4f5bfc5265c4ce568d25 --- /dev/null +++ b/modules/image/classification/resnet18_imagenet/module.py @@ -0,0 +1,226 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet18.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act="relu", name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet18.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act="relu") + return y + + +@moduleinfo( + name="resnet18_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet18_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet18(nn.Layer): + """ResNet18 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet18, self).__init__() + + self.layers = 18 + depth = [2, 2, 2, 2] + num_channels = [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + conv_name, + BasicBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet18_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet18_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet18_vd_imagenet/module.py b/modules/image/classification/resnet18_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..70cd1b68d0ca9036249433bbace10e4a9212912f --- /dev/null +++ b/modules/image/classification/resnet18_vd_imagenet/module.py @@ -0,0 +1,246 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet18_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet18_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet18_vd_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet18_vd_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet18_vd(nn.Layer): + """ResNet18_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet18_vd, self).__init__() + + self.layers = 18 + depth = [2, 2, 2, 2] + num_channels = [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet18_vd_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet18_vd_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet200_vd_imagenet/module.py b/modules/image/classification/resnet200_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e78bde46d45d54a909d049654474fc6af427be9b --- /dev/null +++ b/modules/image/classification/resnet200_vd_imagenet/module.py @@ -0,0 +1,254 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet200_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet200_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet200_vd_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet200_vd_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet200_vd(nn.Layer): + """ResNet200_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet200_vd, self).__init__() + + self.layers = 200 + + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet200_vd_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet200_vd_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet34_imagenet/module.py b/modules/image/classification/resnet34_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b1531b8ce2d1b6ce71cb2c1b61aa4f127a50eed6 --- /dev/null +++ b/modules/image/classification/resnet34_imagenet/module.py @@ -0,0 +1,226 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet34.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act="relu", name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet34.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act="relu") + return y + + +@moduleinfo( + name="resnet34_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet34_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet34(nn.Layer): + """ResNet34 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet34, self).__init__() + + self.layers = 34 + depth = [3, 4, 6, 3] + num_channels = [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + conv_name, + BasicBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet34_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet34_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/__init__.py b/modules/image/classification/resnet34_v2_imagenet/__init__.py similarity index 100% rename from hub_module/modules/image/classification/res2net101_vd_26w_4s_imagenet/__init__.py rename to modules/image/classification/resnet34_v2_imagenet/__init__.py diff --git a/modules/image/classification/resnet34_v2_imagenet/data_feed.py b/modules/image/classification/resnet34_v2_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9d790e93333f3742be9ee58215da87eb2ef819 --- /dev/null +++ b/modules/image/classification/resnet34_v2_imagenet/data_feed.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def test_reader(paths=None, images=None): + """data generator + :param paths: path to images. + :type paths: list, each element is a str + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + """ + img_list = [] + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = Image.open(img_path) + #img = cv2.imread(img_path) + img_list.append(img) + if images is not None: + for img in images: + img_list.append(Image.fromarray(np.uint8(img))) + for im in img_list: + im = process_image(im) + yield im diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/label_file.txt b/modules/image/classification/resnet34_v2_imagenet/label_file.txt similarity index 100% rename from hub_module/modules/image/classification/resnet34_v2_imagenet/label_file.txt rename to modules/image/classification/resnet34_v2_imagenet/label_file.txt diff --git a/modules/image/classification/resnet34_v2_imagenet/module.py b/modules/image/classification/resnet34_v2_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..66287245fac7ca60b73c6c714256739e9efd65b9 --- /dev/null +++ b/modules/image/classification/resnet34_v2_imagenet/module.py @@ -0,0 +1,233 @@ +import os +import ast +import argparse + +import numpy as np +import paddlehub as hub +import paddle.fluid as fluid +from paddlehub.module.module import moduleinfo, runnable +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser + +from resnet34_v2_imagenet.resnet import ResNet, ResNetC5 +from resnet34_v2_imagenet.processor import load_label_info +from resnet34_v2_imagenet.data_feed import test_reader + + +@moduleinfo( + name="resnet34_v2_imagenet", + version="1.1.0", + type="cv/classification", + summary="ResNet34 is a image classfication model trained with ImageNet-2012 dataset.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class ResNet34(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "resnet34_v2_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.infer_prog = None + self.pred_out = None + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, + input_image=None, + trainable=True, + pretrained=True, + param_prefix='', + get_prediction=False, + variant='d', + norm_type='bn', + feature_maps=[3, 4, 5], + return_c5=False): + """Distill the Head Features, so as to perform transfer learning. + + :param input_image: image tensor. + :type input_image: + :param trainable: whether to set parameters trainable. + :type trainable: bool + :param pretrained: whether to load default pretrained model. + :type pretrained: bool + :param param_prefix: the prefix of parameters in yolo_head and backbone + :type param_prefix: str + :param get_prediction: whether to get prediction, + if True, outputs is {'bbox_out': bbox_out}, + if False, outputs is {'head_features': head_features}. + :type get_prediction: bool + :param depth: depth of network + :type depth: int + :param variant: type of resnet + :type variant: str + :param norm_type: type of normlization + :type norm_type: str + :param feature_maps: stage of output + :type feature_maps: list + """ + context_prog = input_image.block.program if input_image else fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + if return_c5: + return ResNetC5(depth=34, norm_type=norm_type, variant=variant, feature_maps=feature_maps) + image = input_image if input_image else fluid.data( + name='image', shape=[-1, 3, 224, 224], dtype='float32', lod_level=0) + backbone = ResNet(depth=34, variant=variant, norm_type=norm_type,\ + feature_maps=feature_maps, get_prediction=get_prediction) + + out = backbone(image) + inputs = {'image': image} + if get_prediction: + outputs = {'pred_out': out} + else: + outputs = {'body_feats': out} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + if not param_prefix: + fluid.io.load_vars( + exe, self.default_pretrained_model_path, main_program=context_prog, predicate=_if_exist) + else: + exe.run(startup_program) + return inputs, outputs, context_prog + + def classification(self, paths=None, images=None, use_gpu=False, batch_size=1, top_k=2): + """API of Classification. + :param paths: the path of images. + :type paths: list, each element is correspond to the path of an image. + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + :param use_gpu: whether to use gpu or not. + :type use_gpu: bool + :param batch_size: bathc size. + :type batch_size: int + :param top_k: result of top k + :typr top_k: int + """ + if self.infer_prog is None: + inputs, outputs, self.infer_prog = self.context(trainable=False, pretrained=True, get_prediction=True) + self.infer_prog = self.infer_prog.clone(for_test=True) + self.pred_out = outputs['pred_out'] + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + all_images = [] + paths = paths if paths else [] + for yield_data in test_reader(paths, images): + all_images.append(yield_data) + + images_num = len(all_images) + loop_num = int(np.ceil(images_num / batch_size)) + + res_list = [] + top_k = max(min(top_k, 1000), 1) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + batch_data = np.array(batch_data).astype('float32') + data_tensor = PaddleTensor(batch_data.copy()) + if use_gpu: + result = self.gpu_predictor.run([data_tensor]) + else: + result = self.cpu_predictor.run([data_tensor]) + for i, res in enumerate(result[0].as_ndarray()): + res_dict = {} + pred_label = np.argsort(res)[::-1][:top_k] + for k in pred_label: + class_name = self.label_names[int(k)].split(',')[0] + max_prob = res[k] + res_dict[class_name] = max_prob + res_list.append(res_dict) + return res_list + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = [] + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.classification(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/name_adapter.py b/modules/image/classification/resnet34_v2_imagenet/name_adapter.py similarity index 100% rename from hub_module/modules/image/classification/resnet34_v2_imagenet/name_adapter.py rename to modules/image/classification/resnet34_v2_imagenet/name_adapter.py diff --git a/modules/image/classification/resnet34_v2_imagenet/nonlocal_helper.py b/modules/image/classification/resnet34_v2_imagenet/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/classification/resnet34_v2_imagenet/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/processor.py b/modules/image/classification/resnet34_v2_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/resnet34_v2_imagenet/processor.py rename to modules/image/classification/resnet34_v2_imagenet/processor.py diff --git a/modules/image/classification/resnet34_v2_imagenet/resnet.py b/modules/image/classification/resnet34_v2_imagenet/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d2463867fd069377184d3253a6d96ebaffc2a96f --- /dev/null +++ b/modules/image/classification/resnet34_v2_imagenet/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=34, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/classification/resnet34_vd_imagenet/module.py b/modules/image/classification/resnet34_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..56cd36838f3d72501b0fe1bdabb9bc9cbca5789e --- /dev/null +++ b/modules/image/classification/resnet34_vd_imagenet/module.py @@ -0,0 +1,246 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet34_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet34_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet34_vd_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet34_vd_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet34_vd(nn.Layer): + """ResNet34_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet34_vd, self).__init__() + + self.layers = 34 + depth = [3, 4, 6, 3] + num_channels = [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet34_vd_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet34_vd_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet34_vd_imagenet_ssld/module.py b/modules/image/classification/resnet34_vd_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d1eb872ea3ae2ae55d298819270309b0e0fbb148 --- /dev/null +++ b/modules/image/classification/resnet34_vd_imagenet_ssld/module.py @@ -0,0 +1,246 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet34_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet34_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet34_vd_imagenet_ssld", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet34_vd_imagenet_ssld is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet34_vd(nn.Layer): + """ResNet34_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet34_vd, self).__init__() + + self.layers = 34 + depth = [3, 4, 6, 3] + num_channels = [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block], + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(basic_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet34_vd_ssld_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet34_vd_ssld_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet50_imagenet/module.py b/modules/image/classification/resnet50_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..9903e829ffe9d74886de3d3da2558e73b06dfb16 --- /dev/null +++ b/modules/image/classification/resnet50_imagenet/module.py @@ -0,0 +1,226 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance") + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet50.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act="relu", name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet50.""" + + def __init__(self, num_channels: int, num_filters: int, stride: int, shortcut: bool = True, name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act="relu", + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act="relu") + return y + + +@moduleinfo( + name="resnet50_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet50_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet50(nn.Layer): + """ResNet50 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet50, self).__init__() + + self.layers = 50 + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act="relu", name="conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + conv_name, + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet50_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/hub_module/modules/image/classification/resnet34_v2_imagenet/__init__.py b/modules/image/classification/resnet50_v2_imagenet/__init__.py similarity index 100% rename from hub_module/modules/image/classification/resnet34_v2_imagenet/__init__.py rename to modules/image/classification/resnet50_v2_imagenet/__init__.py diff --git a/modules/image/classification/resnet50_v2_imagenet/data_feed.py b/modules/image/classification/resnet50_v2_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9d790e93333f3742be9ee58215da87eb2ef819 --- /dev/null +++ b/modules/image/classification/resnet50_v2_imagenet/data_feed.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def test_reader(paths=None, images=None): + """data generator + :param paths: path to images. + :type paths: list, each element is a str + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + """ + img_list = [] + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = Image.open(img_path) + #img = cv2.imread(img_path) + img_list.append(img) + if images is not None: + for img in images: + img_list.append(Image.fromarray(np.uint8(img))) + for im in img_list: + im = process_image(im) + yield im diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/label_file.txt b/modules/image/classification/resnet50_v2_imagenet/label_file.txt similarity index 100% rename from hub_module/modules/image/classification/resnet50_v2_imagenet/label_file.txt rename to modules/image/classification/resnet50_v2_imagenet/label_file.txt diff --git a/modules/image/classification/resnet50_v2_imagenet/module.py b/modules/image/classification/resnet50_v2_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..14c9221a3b105057803ca3c4d42e53dc9ba2d50f --- /dev/null +++ b/modules/image/classification/resnet50_v2_imagenet/module.py @@ -0,0 +1,233 @@ +import os +import ast +import argparse + +import numpy as np +import paddlehub as hub +import paddle.fluid as fluid +from paddlehub.module.module import moduleinfo, runnable +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser + +from resnet50_v2_imagenet.resnet import ResNet, ResNetC5 +from resnet50_v2_imagenet.processor import load_label_info +from resnet50_v2_imagenet.data_feed import test_reader + + +@moduleinfo( + name="resnet50_v2_imagenet", + version="1.1.0", + type="cv/classification", + summary="ResNet50 is a image classfication model trained with ImageNet-2012 dataset.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class ResNet50(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "resnet50_v2_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.infer_prog = None + self.pred_out = None + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, + input_image=None, + trainable=True, + pretrained=True, + param_prefix='', + get_prediction=False, + variant='d', + norm_type='bn', + feature_maps=[3, 4, 5], + return_c5=False): + """Distill the Head Features, so as to perform transfer learning. + + :param input_image: image tensor. + :type input_image: + :param trainable: whether to set parameters trainable. + :type trainable: bool + :param pretrained: whether to load default pretrained model. + :type pretrained: bool + :param param_prefix: the prefix of parameters in yolo_head and backbone + :type param_prefix: str + :param get_prediction: whether to get prediction, + if True, outputs is {'bbox_out': bbox_out}, + if False, outputs is {'head_features': head_features}. + :type get_prediction: bool + :param depth: depth of network + :type depth: int + :param variant: type of resnet + :type variant: str + :param norm_type: type of normlization + :type norm_type: str + :param feature_maps: stage of output + :type feature_maps: list + """ + context_prog = input_image.block.program if input_image else fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + if return_c5: + return ResNetC5(depth=50, norm_type=norm_type, variant=variant, feature_maps=feature_maps) + image = input_image if input_image else fluid.data( + name='image', shape=[-1, 3, 224, 224], dtype='float32', lod_level=0) + backbone = ResNet(depth=50, variant=variant, norm_type=norm_type,\ + feature_maps=feature_maps, get_prediction=get_prediction) + + out = backbone(image) + inputs = {'image': image} + if get_prediction: + outputs = {'pred_out': out} + else: + outputs = {'body_feats': out} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + if not param_prefix: + fluid.io.load_vars( + exe, self.default_pretrained_model_path, main_program=context_prog, predicate=_if_exist) + else: + exe.run(startup_program) + return inputs, outputs, context_prog + + def classification(self, paths=None, images=None, use_gpu=False, batch_size=1, top_k=2): + """API of Classification. + :param paths: the path of images. + :type paths: list, each element is correspond to the path of an image. + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + :param use_gpu: whether to use gpu or not. + :type use_gpu: bool + :param batch_size: bathc size. + :type batch_size: int + :param top_k: result of top k + :typr top_k: int + """ + if self.infer_prog is None: + inputs, outputs, self.infer_prog = self.context(trainable=False, pretrained=True, get_prediction=True) + self.infer_prog = self.infer_prog.clone(for_test=True) + self.pred_out = outputs['pred_out'] + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + all_images = [] + paths = paths if paths else [] + for yield_data in test_reader(paths, images): + all_images.append(yield_data) + + images_num = len(all_images) + loop_num = int(np.ceil(images_num / batch_size)) + + res_list = [] + top_k = max(min(top_k, 1000), 1) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + batch_data = np.array(batch_data).astype('float32') + data_tensor = PaddleTensor(batch_data.copy()) + if use_gpu: + result = self.gpu_predictor.run([data_tensor]) + else: + result = self.cpu_predictor.run([data_tensor]) + for i, res in enumerate(result[0].as_ndarray()): + res_dict = {} + pred_label = np.argsort(res)[::-1][:top_k] + for k in pred_label: + class_name = self.label_names[int(k)].split(',')[0] + max_prob = res[k] + res_dict[class_name] = max_prob + res_list.append(res_dict) + return res_list + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = [] + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.classification(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/name_adapter.py b/modules/image/classification/resnet50_v2_imagenet/name_adapter.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_v2_imagenet/name_adapter.py rename to modules/image/classification/resnet50_v2_imagenet/name_adapter.py diff --git a/modules/image/classification/resnet50_v2_imagenet/nonlocal_helper.py b/modules/image/classification/resnet50_v2_imagenet/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/classification/resnet50_v2_imagenet/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/processor.py b/modules/image/classification/resnet50_v2_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_v2_imagenet/processor.py rename to modules/image/classification/resnet50_v2_imagenet/processor.py diff --git a/modules/image/classification/resnet50_v2_imagenet/resnet.py b/modules/image/classification/resnet50_v2_imagenet/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d1cdab364c27d17875fcfa3594ac0ad188667f44 --- /dev/null +++ b/modules/image/classification/resnet50_v2_imagenet/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='d', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/classification/resnet50_vd_10w/module.py b/modules/image/classification/resnet50_vd_10w/module.py new file mode 100644 index 0000000000000000000000000000000000000000..844876dca0e594eab634c6026554d4714ab44103 --- /dev/null +++ b/modules/image/classification/resnet50_vd_10w/module.py @@ -0,0 +1,246 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet50_vd_10w", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet50_vd_imagenet_ssld is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet50_vd(nn.Layer): + """ResNet50_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet50_vd, self).__init__() + + self.layers = 50 + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet50_vd_10w.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_vd_10w.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/README.md b/modules/image/classification/resnet50_vd_animals/README.md similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_animals/README.md rename to modules/image/classification/resnet50_vd_animals/README.md diff --git a/hub_module/modules/image/classification/resnet50_v2_imagenet/__init__.py b/modules/image/classification/resnet50_vd_animals/__init__.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_v2_imagenet/__init__.py rename to modules/image/classification/resnet50_vd_animals/__init__.py diff --git a/modules/image/classification/resnet50_vd_animals/data_feed.py b/modules/image/classification/resnet50_vd_animals/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/resnet50_vd_animals/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/label_list.txt b/modules/image/classification/resnet50_vd_animals/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_animals/label_list.txt rename to modules/image/classification/resnet50_vd_animals/label_list.txt diff --git a/modules/image/classification/resnet50_vd_animals/module.py b/modules/image/classification/resnet50_vd_animals/module.py new file mode 100644 index 0000000000000000000000000000000000000000..18eb3db106b1dcb5db7828271f5d18e243986739 --- /dev/null +++ b/modules/image/classification/resnet50_vd_animals/module.py @@ -0,0 +1,227 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from resnet50_vd_animals.processor import postprocess, base64_to_cv2 +from resnet50_vd_animals.data_feed import reader +from resnet50_vd_animals.resnet_vd import ResNet50_vd + + +@moduleinfo( + name="resnet50_vd_animals", + type="CV/image_classification", + author="baidu-vis", + author_email="", + summary="ResNet50vd is a image classfication model, this module is trained with Baidu's self-built animals dataset.", + version="1.0.0") +class ResNet50vdAnimals(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + resnet_vd = ResNet50_vd() + output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/processor.py b/modules/image/classification/resnet50_vd_animals/processor.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_animals/processor.py rename to modules/image/classification/resnet50_vd_animals/processor.py diff --git a/modules/image/classification/resnet50_vd_animals/resnet_vd.py b/modules/image/classification/resnet50_vd_animals/resnet_vd.py new file mode 100755 index 0000000000000000000000000000000000000000..3d9a91ca7e8e40cdb54f5bcf9f9f522251e6be86 --- /dev/null +++ b/modules/image/classification/resnet50_vd_animals/resnet_vd.py @@ -0,0 +1,185 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50, is_3x3=False): + self.params = train_parameters + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == 0, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + pool = fluid.layers.pool2d(input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNet50_vd(): + model = ResNet(layers=50, is_3x3=True) + return model + + +def ResNet101_vd(): + model = ResNet(layers=101, is_3x3=True) + return model + + +def ResNet152_vd(): + model = ResNet(layers=152, is_3x3=True) + return model + + +def ResNet200_vd(): + model = ResNet(layers=200, is_3x3=True) + return model diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/README.md b/modules/image/classification/resnet50_vd_dishes/README.md similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_dishes/README.md rename to modules/image/classification/resnet50_vd_dishes/README.md diff --git a/hub_module/modules/image/classification/resnet50_vd_animals/__init__.py b/modules/image/classification/resnet50_vd_dishes/__init__.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_animals/__init__.py rename to modules/image/classification/resnet50_vd_dishes/__init__.py diff --git a/modules/image/classification/resnet50_vd_dishes/data_feed.py b/modules/image/classification/resnet50_vd_dishes/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/resnet50_vd_dishes/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/label_list.txt b/modules/image/classification/resnet50_vd_dishes/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_dishes/label_list.txt rename to modules/image/classification/resnet50_vd_dishes/label_list.txt diff --git a/modules/image/classification/resnet50_vd_dishes/module.py b/modules/image/classification/resnet50_vd_dishes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..43c072b7f81ab9e02749bcc12734c454b2da8a85 --- /dev/null +++ b/modules/image/classification/resnet50_vd_dishes/module.py @@ -0,0 +1,227 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from resnet50_vd_dishes.processor import postprocess, base64_to_cv2 +from resnet50_vd_dishes.data_feed import reader +from resnet50_vd_dishes.resnet_vd import ResNet50_vd + + +@moduleinfo( + name="resnet50_vd_dishes", + type="CV/image_classification", + author="baidu-vis", + author_email="", + summary="ResNet50vd is a image classfication model, this module is trained with Baidu's self-built dishes dataset.", + version="1.0.0") +class ResNet50vdDishes(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + resnet_vd = ResNet50_vd() + output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/processor.py b/modules/image/classification/resnet50_vd_dishes/processor.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_dishes/processor.py rename to modules/image/classification/resnet50_vd_dishes/processor.py diff --git a/modules/image/classification/resnet50_vd_dishes/resnet_vd.py b/modules/image/classification/resnet50_vd_dishes/resnet_vd.py new file mode 100755 index 0000000000000000000000000000000000000000..3d9a91ca7e8e40cdb54f5bcf9f9f522251e6be86 --- /dev/null +++ b/modules/image/classification/resnet50_vd_dishes/resnet_vd.py @@ -0,0 +1,185 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50, is_3x3=False): + self.params = train_parameters + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == 0, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + pool = fluid.layers.pool2d(input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNet50_vd(): + model = ResNet(layers=50, is_3x3=True) + return model + + +def ResNet101_vd(): + model = ResNet(layers=101, is_3x3=True) + return model + + +def ResNet152_vd(): + model = ResNet(layers=152, is_3x3=True) + return model + + +def ResNet200_vd(): + model = ResNet(layers=200, is_3x3=True) + return model diff --git a/modules/image/classification/resnet50_vd_imagenet/module.py b/modules/image/classification/resnet50_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d018af55d88164fb582444479a7c12a4477bf0bd --- /dev/null +++ b/modules/image/classification/resnet50_vd_imagenet/module.py @@ -0,0 +1,247 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet50_vd_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet50_vd_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet50_vd(nn.Layer): + """ResNet50_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet50_vd, self).__init__() + + self.layers = 50 + + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet50_vd_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_vd_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnet50_vd_imagenet_ssld/module.py b/modules/image/classification/resnet50_vd_imagenet_ssld/module.py new file mode 100644 index 0000000000000000000000000000000000000000..18375cff3b22668c25201c69484e59e47a884ae0 --- /dev/null +++ b/modules/image/classification/resnet50_vd_imagenet_ssld/module.py @@ -0,0 +1,248 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNet50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +class BasicBlock(nn.Layer): + """Basic block for ResNet50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + return y + + +@moduleinfo( + name="resnet50_vd_imagenet_ssld", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnet50_vd_imagenet_ssld is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNet50_vd(nn.Layer): + """ResNet50_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNet50_vd, self).__init__() + + self.layers = 50 + + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_0.w_0"), + bias_attr=ParamAttr(name="fc_0.b_0")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnet50_vd_ssld.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnet50_vd_ssld.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/README.md b/modules/image/classification/resnet50_vd_wildanimals/README.md similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_wildanimals/README.md rename to modules/image/classification/resnet50_vd_wildanimals/README.md diff --git a/hub_module/modules/image/classification/resnet50_vd_dishes/__init__.py b/modules/image/classification/resnet50_vd_wildanimals/__init__.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_dishes/__init__.py rename to modules/image/classification/resnet50_vd_wildanimals/__init__.py diff --git a/modules/image/classification/resnet50_vd_wildanimals/data_feed.py b/modules/image/classification/resnet50_vd_wildanimals/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/resnet50_vd_wildanimals/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/label_list.txt b/modules/image/classification/resnet50_vd_wildanimals/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_wildanimals/label_list.txt rename to modules/image/classification/resnet50_vd_wildanimals/label_list.txt diff --git a/modules/image/classification/resnet50_vd_wildanimals/module.py b/modules/image/classification/resnet50_vd_wildanimals/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ab6e73b35da2c8ca6d955fd8a864a284018434 --- /dev/null +++ b/modules/image/classification/resnet50_vd_wildanimals/module.py @@ -0,0 +1,228 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from resnet50_vd_wildanimals.processor import postprocess, base64_to_cv2 +from resnet50_vd_wildanimals.data_feed import reader +from resnet50_vd_wildanimals.resnet_vd import ResNet50_vd + + +@moduleinfo( + name="resnet50_vd_wildanimals", + type="CV/image_classification", + author="baidu-vis", + author_email="", + summary= + "ResNet50vd is a image classfication model, this module is trained with IFAW's self-built wild animals dataset.", + version="1.0.0") +class ResNet50vdWildAnimals(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + resnet_vd = ResNet50_vd() + output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (numpy.ndarray): data of images, shape of each is [H, W, C]. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/processor.py b/modules/image/classification/resnet50_vd_wildanimals/processor.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_wildanimals/processor.py rename to modules/image/classification/resnet50_vd_wildanimals/processor.py diff --git a/modules/image/classification/resnet50_vd_wildanimals/resnet_vd.py b/modules/image/classification/resnet50_vd_wildanimals/resnet_vd.py new file mode 100755 index 0000000000000000000000000000000000000000..3d9a91ca7e8e40cdb54f5bcf9f9f522251e6be86 --- /dev/null +++ b/modules/image/classification/resnet50_vd_wildanimals/resnet_vd.py @@ -0,0 +1,185 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50, is_3x3=False): + self.params = train_parameters + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == 0, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + pool = fluid.layers.pool2d(input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNet50_vd(): + model = ResNet(layers=50, is_3x3=True) + return model + + +def ResNet101_vd(): + model = ResNet(layers=101, is_3x3=True) + return model + + +def ResNet152_vd(): + model = ResNet(layers=152, is_3x3=True) + return model + + +def ResNet200_vd(): + model = ResNet(layers=200, is_3x3=True) + return model diff --git a/modules/image/classification/resnext101_32x4d_imagenet/module.py b/modules/image/classification/resnext101_32x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fe9ab9c49d369eae41ba5a9896a76fecc7d5c7de --- /dev/null +++ b/modules/image/classification/resnext101_32x4d_imagenet/module.py @@ -0,0 +1,200 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt101.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext101_32x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext101_32x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt101_32x4d(nn.Layer): + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt101_32x4d, self).__init__() + + self.layers = 101 + self.cardinality = 32 + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, 1024] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext101_32x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_32x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext101_64x4d_imagenet/module.py b/modules/image/classification/resnext101_64x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f0d65cdfaf8080dcdcd53fc30c7823fcded9e2a5 --- /dev/null +++ b/modules/image/classification/resnext101_64x4d_imagenet/module.py @@ -0,0 +1,200 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt101.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext101_64x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext101_64x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt101_64x4d(nn.Layer): + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt101_64x4d, self).__init__() + + self.layers = 101 + self.cardinality = 64 + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [256, 512, 1024, 2048] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext101_64x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_64x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext101_vd_32x4d_imagenet/module.py b/modules/image/classification/resnext101_vd_32x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..1929939f9273607e0685ffba83d05e1497c31ce6 --- /dev/null +++ b/modules/image/classification/resnext101_vd_32x4d_imagenet/module.py @@ -0,0 +1,220 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext101_vd_32x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext101_vd_32x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt101_vd(nn.Layer): + """ResNeXt101_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt101_vd, self).__init__() + + self.layers = 101 + self.cardinality = 32 + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, 1024] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext101_vd_32x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_vd_32x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext101_vd_64x4d_imagenet/module.py b/modules/image/classification/resnext101_vd_64x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..650fdf704eda0a823cd3e7458aad3829aa272d1f --- /dev/null +++ b/modules/image/classification/resnext101_vd_64x4d_imagenet/module.py @@ -0,0 +1,220 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext101_vd_64x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext101_vd_64x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt101_vd(nn.Layer): + """ResNeXt101_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt101_vd, self).__init__() + + self.layers = 101 + self.cardinality = 64 + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext101_vd_64x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext101_vd_64x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext152_32x4d_imagenet/module.py b/modules/image/classification/resnext152_32x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..13180c8040bc1bb402a13f6605a281b3a794df36 --- /dev/null +++ b/modules/image/classification/resnext152_32x4d_imagenet/module.py @@ -0,0 +1,200 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt152.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext152_32x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext152_32x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt152_32x4d(nn.Layer): + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt152_32x4d, self).__init__() + + self.layers = 152 + self.cardinality = 32 + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, 1024] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext152_32x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_32x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext152_64x4d_imagenet/module.py b/modules/image/classification/resnext152_64x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f561bcf1a4180a4ae4c0eb823d78eca118ceefcd --- /dev/null +++ b/modules/image/classification/resnext152_64x4d_imagenet/module.py @@ -0,0 +1,200 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt152.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext152_64x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext152_64x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt152_64x4d(nn.Layer): + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt152_64x4d, self).__init__() + + self.layers = 152 + self.cardinality = 64 + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [256, 512, 1024, 2048] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext152_64x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_64x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext152_vd_32x4d_imagenet/module.py b/modules/image/classification/resnext152_vd_32x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..311a4aac0a04db49e157058e313a8ecfc99d63ee --- /dev/null +++ b/modules/image/classification/resnext152_vd_32x4d_imagenet/module.py @@ -0,0 +1,220 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt152_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext152_vd_32x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext152_vd_32x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt152_vd(nn.Layer): + """ResNeXt152_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt152_vd, self).__init__() + + self.layers = 152 + self.cardinality = 32 + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, 1024] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext152_vd_32x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_vd_32x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext152_vd_64x4d_imagenet/module.py b/modules/image/classification/resnext152_vd_64x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3065322ca18fffa4280574570f7e10e928c38bc1 --- /dev/null +++ b/modules/image/classification/resnext152_vd_64x4d_imagenet/module.py @@ -0,0 +1,220 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt152_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext152_vd_64x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext152_vd_64x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt152_vd(nn.Layer): + """ResNeXt152_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt152_vd, self).__init__() + + self.layers = 152 + self.cardinality = 64 + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + if block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext152_vd_64x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext152_vd_64x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext50_32x4d_imagenet/module.py b/modules/image/classification/resnext50_32x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7d2bf40b08c33f793d566cbc3d1e74fb37d6ca16 --- /dev/null +++ b/modules/image/classification/resnext50_32x4d_imagenet/module.py @@ -0,0 +1,194 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt50.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext50_32x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext50_32x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt50_32x4d(nn.Layer): + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt50_32x4d, self).__init__() + + self.layers = 50 + self.cardinality = 32 + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, 1024] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext50_32x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_32x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext50_64x4d_imagenet/module.py b/modules/image/classification/resnext50_64x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..96a1b970db20c97f7866a2c5160f05c03ca57436 --- /dev/null +++ b/modules/image/classification/resnext50_64x4d_imagenet/module.py @@ -0,0 +1,194 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt50.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=stride, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext50_64x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext50_64x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt50_64x4d(nn.Layer): + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt50_64x4d, self).__init__() + + self.layers = 50 + self.cardinality = 64 + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [256, 512, 1024, 2048] + + self.conv = ConvBNLayer(num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu', name="res_conv1") + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + self.pool2d_avg_channels = num_channels[-1] * 2 + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext50_64x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_64x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv(inputs) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext50_vd_32x4d_imagenet/module.py b/modules/image/classification/resnext50_vd_32x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..9810b6dc5f9690cfdf2024283aa7ef2918e9461c --- /dev/null +++ b/modules/image/classification/resnext50_vd_32x4d_imagenet/module.py @@ -0,0 +1,214 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext50_vd_32x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext50_vd_32x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt50_vd(nn.Layer): + """ResNeXt50_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt50_vd, self).__init__() + + self.layers = 50 + self.cardinality = 32 + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [128, 256, 512, 1024] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext50_vd_32x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_vd_32x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/modules/image/classification/resnext50_vd_64x4d_imagenet/module.py b/modules/image/classification/resnext50_vd_64x4d_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f06fac599443b502d097443cdb4f3c761cf080ac --- /dev/null +++ b/modules/image/classification/resnext50_vd_64x4d_imagenet/module.py @@ -0,0 +1,214 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__( + self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class BottleneckBlock(nn.Layer): + """Bottleneck Block for ResNeXt50_vd.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + cardinality: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + groups=cardinality, + stride=stride, + act='relu', + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 2 if cardinality == 32 else num_filters, + filter_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.elementwise_add(x=short, y=conv2, act='relu') + return y + + +@moduleinfo( + name="resnext50_vd_64x4d_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="resnext50_vd_64x4d_imagenet is a classification model, " + "this module is trained with Baidu open sourced dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ResNeXt50_vd(nn.Layer): + """ResNeXt50_vd model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ResNeXt50_vd, self).__init__() + + self.layers = 50 + self.cardinality = 64 + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [256, 512, 1024, 2048] + + self.conv1_1 = ConvBNLayer(num_channels=3, num_filters=32, filter_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = ConvBNLayer(num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = ConvBNLayer(num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', name="conv1_3") + + self.pool2d_max = MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * int(64 // self.cardinality), + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=self.cardinality, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name)) + self.block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = AdaptiveAvgPool2d(1) + + self.pool2d_avg_channels = num_channels[-1] * 2 + + stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0) + + self.out = Linear( + self.pool2d_avg_channels, + class_dim, + weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv), name="fc_weights"), + bias_attr=ParamAttr(name="fc_offset")) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'resnext50_vd_64x4d_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/resnext50_vd_64x4d_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + for block in self.block_list: + y = block(y) + y = self.pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self.pool2d_avg_channels]) + y = self.out(y) + return y diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/README.md b/modules/image/classification/se_resnet18_vd_imagenet/README.md similarity index 100% rename from hub_module/modules/image/classification/se_resnet18_vd_imagenet/README.md rename to modules/image/classification/se_resnet18_vd_imagenet/README.md diff --git a/hub_module/modules/image/classification/resnet50_vd_wildanimals/__init__.py b/modules/image/classification/se_resnet18_vd_imagenet/__init__.py similarity index 100% rename from hub_module/modules/image/classification/resnet50_vd_wildanimals/__init__.py rename to modules/image/classification/se_resnet18_vd_imagenet/__init__.py diff --git a/modules/image/classification/se_resnet18_vd_imagenet/data_feed.py b/modules/image/classification/se_resnet18_vd_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..99a0855fd6a93dbecd081cef312a04a350cfcc50 --- /dev/null +++ b/modules/image/classification/se_resnet18_vd_imagenet/data_feed.py @@ -0,0 +1,82 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/label_list.txt b/modules/image/classification/se_resnet18_vd_imagenet/label_list.txt similarity index 100% rename from hub_module/modules/image/classification/se_resnet18_vd_imagenet/label_list.txt rename to modules/image/classification/se_resnet18_vd_imagenet/label_list.txt diff --git a/modules/image/classification/se_resnet18_vd_imagenet/module.py b/modules/image/classification/se_resnet18_vd_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..6ecf730666a2d290c2dd269cd5066250486ceb04 --- /dev/null +++ b/modules/image/classification/se_resnet18_vd_imagenet/module.py @@ -0,0 +1,231 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from se_resnet18_vd_imagenet.processor import postprocess, base64_to_cv2 +from se_resnet18_vd_imagenet.data_feed import reader +from se_resnet18_vd_imagenet.se_resnet import SE_ResNet18_vd + + +@moduleinfo( + name="se_resnet18_vd_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="SE_ResNet18_vd is a image classfication model, this module is trained with imagenet datasets.", + version="1.0.0") +class SEResNet18vdImageNet(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "se_resnet18_vd_imagenet_model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self.predictor_set = False + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True): + """context for transfer learning. + + Args: + trainable (bool): Set parameters in program to be trainable. + pretrained (bool) : Whether to load pretrained model. + + Returns: + inputs (dict): key is 'image', corresponding vaule is image tensor. + outputs (dict): key is : + 'classification', corresponding value is the result of classification. + 'feature_map', corresponding value is the result of the layer before the fully connected layer. + context_prog (fluid.Program): program for transfer learning. + """ + context_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(context_prog, startup_prog): + with fluid.unique_name.guard(): + image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") + resnet_vd = SE_ResNet18_vd() + output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) + + name_prefix = '@HUB_{}@'.format(self.name) + inputs = {'image': name_prefix + image.name} + outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} + add_vars_prefix(context_prog, name_prefix) + add_vars_prefix(startup_prog, name_prefix) + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = {key: global_vars[value] for key, value in outputs.items()} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + return b + + fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) + else: + exe.run(startup_prog) + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + return inputs, outputs, context_prog + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. + + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if not self.predictor_set: + self._set_config() + self.predictor_set = True + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( + [batch_image]) + out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + res += out + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/processor.py b/modules/image/classification/se_resnet18_vd_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/se_resnet18_vd_imagenet/processor.py rename to modules/image/classification/se_resnet18_vd_imagenet/processor.py diff --git a/modules/image/classification/se_resnet18_vd_imagenet/resnet_vd.py b/modules/image/classification/se_resnet18_vd_imagenet/resnet_vd.py new file mode 100755 index 0000000000000000000000000000000000000000..bb86ffec4bb3cd79d554c1752a931a3cbb1a12df --- /dev/null +++ b/modules/image/classification/se_resnet18_vd_imagenet/resnet_vd.py @@ -0,0 +1,206 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] + + +class ResNet(): + def __init__(self, layers=50, is_3x3=False): + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self.shortcut(input, num_filters, stride, if_first=if_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet18_vd(): + model = ResNet(layers=18, is_3x3=True) + return model + + +def ResNet34_vd(): + model = ResNet(layers=34, is_3x3=True) + return model + + +def ResNet50_vd(): + model = ResNet(layers=50, is_3x3=True) + return model + + +def ResNet101_vd(): + model = ResNet(layers=101, is_3x3=True) + return model + + +def ResNet152_vd(): + model = ResNet(layers=152, is_3x3=True) + return model + + +def ResNet200_vd(): + model = ResNet(layers=200, is_3x3=True) + return model diff --git a/modules/image/classification/se_resnet18_vd_imagenet/se_resnet.py b/modules/image/classification/se_resnet18_vd_imagenet/se_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..894492e7b59436786e778b835a81d5a9cd112e50 --- /dev/null +++ b/modules/image/classification/se_resnet18_vd_imagenet/se_resnet.py @@ -0,0 +1,238 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math + +__all__ = [ + "SE_ResNet_vd", "SE_ResNet18_vd", "SE_ResNet34_vd", "SE_ResNet50_vd", "SE_ResNet101_vd", "SE_ResNet152_vd", + "SE_ResNet200_vd" +] + + +class SE_ResNet_vd(): + def __init__(self, layers=50, is_3x3=False): + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + reduction_ratio = 16 + if is_3x3 == False: + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + reduction_ratio=reduction_ratio, + name=conv_name) + + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + reduction_ratio=reduction_ratio, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + return out, pool + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first, reduction_ratio): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + scale = self.squeeze_excitation( + input=conv2, num_channels=num_filters * 4, reduction_ratio=reduction_ratio, name='fc_' + name) + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first, reduction_ratio): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + scale = self.squeeze_excitation( + input=conv1, num_channels=num_filters, reduction_ratio=reduction_ratio, name='fc_' + name) + short = self.shortcut(input, num_filters, stride, if_first=if_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def squeeze_excitation(self, input, num_channels, reduction_ratio, name=None): + pool = fluid.layers.pool2d(input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def SE_ResNet18_vd(): + model = SE_ResNet_vd(layers=18, is_3x3=True) + return model + + +def SE_ResNet34_vd(): + model = SE_ResNet_vd(layers=34, is_3x3=True) + return model + + +def SE_ResNet50_vd(): + model = SE_ResNet_vd(layers=50, is_3x3=True) + return model + + +def SE_ResNet101_vd(): + model = SE_ResNet_vd(layers=101, is_3x3=True) + return model + + +def SE_ResNet152_vd(): + model = SE_ResNet_vd(layers=152, is_3x3=True) + return model + + +def SE_ResNet200_vd(): + model = SE_ResNet_vd(layers=200, is_3x3=True) + return model diff --git a/modules/image/classification/shufflenet_v2_imagenet/module.py b/modules/image/classification/shufflenet_v2_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ce0f669fddc5c2c19b55e40f6381ba80e678f59a --- /dev/null +++ b/modules/image/classification/shufflenet_v2_imagenet/module.py @@ -0,0 +1,313 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import MSRA +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def channel_shuffle(x: paddle.Tensor, groups: int): + """Shuffle input channels.""" + batchsize, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3] + channels_per_group = num_channels // groups + + # reshape + x = paddle.reshape(x=x, shape=[batchsize, groups, channels_per_group, height, width]) + + x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) + # flatten + x = paddle.reshape(x=x, shape=[batchsize, num_channels, height, width]) + return x + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + filter_size: int, + num_filters: int, + stride: int, + padding: int, + channels: int = None, + num_groups: int = 1, + if_act: bool = True, + act: str = 'relu', + name: str = None): + super(ConvBNLayer, self).__init__() + self._if_act = if_act + assert act in ['relu', 'swish'], \ + "supported act are {} but your act is {}".format( + ['relu', 'swish'], act) + self._act = act + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(initializer=MSRA(), name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs: paddle.Tensor, if_act: bool = True): + y = self._conv(inputs) + y = self._batch_norm(y) + if self._if_act: + y = F.relu(y) if self._act == 'relu' else F.swish(y) + return y + + +class InvertedResidualUnit(nn.Layer): + """Inverted Residual unit.""" + + def __init__(self, + num_channels: int, + num_filters: int, + stride: int, + benchmodel: int, + act: str = 'relu', + name: str = None): + super(InvertedResidualUnit, self).__init__() + assert stride in [1, 2], \ + "supported stride are {} but your stride is {}".format([1, 2], stride) + self.benchmodel = benchmodel + oup_inc = num_filters // 2 + inp = num_channels + if benchmodel == 1: + self._conv_pw = ConvBNLayer( + num_channels=num_channels // 2, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act=act, + name='stage_' + name + '_conv1') + self._conv_dw = ConvBNLayer( + num_channels=oup_inc, + num_filters=oup_inc, + filter_size=3, + stride=stride, + padding=1, + num_groups=oup_inc, + if_act=False, + act=act, + name='stage_' + name + '_conv2') + self._conv_linear = ConvBNLayer( + num_channels=oup_inc, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act=act, + name='stage_' + name + '_conv3') + else: + # branch1 + self._conv_dw_1 = ConvBNLayer( + num_channels=num_channels, + num_filters=inp, + filter_size=3, + stride=stride, + padding=1, + num_groups=inp, + if_act=False, + act=act, + name='stage_' + name + '_conv4') + self._conv_linear_1 = ConvBNLayer( + num_channels=inp, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act=act, + name='stage_' + name + '_conv5') + # branch2 + self._conv_pw_2 = ConvBNLayer( + num_channels=num_channels, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act=act, + name='stage_' + name + '_conv1') + self._conv_dw_2 = ConvBNLayer( + num_channels=oup_inc, + num_filters=oup_inc, + filter_size=3, + stride=stride, + padding=1, + num_groups=oup_inc, + if_act=False, + act=act, + name='stage_' + name + '_conv2') + self._conv_linear_2 = ConvBNLayer( + num_channels=oup_inc, + num_filters=oup_inc, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + act=act, + name='stage_' + name + '_conv3') + + def forward(self, inputs: paddle.Tensor): + if self.benchmodel == 1: + x1, x2 = paddle.split(inputs, num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2], axis=1) + x2 = self._conv_pw(x2) + x2 = self._conv_dw(x2) + x2 = self._conv_linear(x2) + out = paddle.concat([x1, x2], axis=1) + else: + x1 = self._conv_dw_1(inputs) + x1 = self._conv_linear_1(x1) + + x2 = self._conv_pw_2(inputs) + x2 = self._conv_dw_2(x2) + x2 = self._conv_linear_2(x2) + out = paddle.concat([x1, x2], axis=1) + + return channel_shuffle(out, 2) + + +@moduleinfo( + name="shufflenet_v2_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="shufflenet_v2_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class ShuffleNet(nn.Layer): + """ShuffleNet model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(ShuffleNet, self).__init__() + self.scale = 1 + self.class_dim = class_dim + stage_repeats = [4, 8, 4] + stage_out_channels = [-1, 24, 116, 232, 464, 1024] + + # 1. conv1 + self._conv1 = ConvBNLayer( + num_channels=3, + num_filters=stage_out_channels[1], + filter_size=3, + stride=2, + padding=1, + if_act=True, + act='relu', + name='stage1_conv') + self._max_pool = MaxPool2d(kernel_size=3, stride=2, padding=1) + + # 2. bottleneck sequences + self._block_list = [] + i = 1 + in_c = int(32) + for idxstage in range(len(stage_repeats)): + numrepeat = stage_repeats[idxstage] + output_channel = stage_out_channels[idxstage + 2] + for i in range(numrepeat): + if i == 0: + block = self.add_sublayer( + str(idxstage + 2) + '_' + str(i + 1), + InvertedResidualUnit( + num_channels=stage_out_channels[idxstage + 1], + num_filters=output_channel, + stride=2, + benchmodel=2, + act='relu', + name=str(idxstage + 2) + '_' + str(i + 1))) + self._block_list.append(block) + else: + block = self.add_sublayer( + str(idxstage + 2) + '_' + str(i + 1), + InvertedResidualUnit( + num_channels=output_channel, + num_filters=output_channel, + stride=1, + benchmodel=1, + act='relu', + name=str(idxstage + 2) + '_' + str(i + 1))) + self._block_list.append(block) + + # 3. last_conv + self._last_conv = ConvBNLayer( + num_channels=stage_out_channels[-2], + num_filters=stage_out_channels[-1], + filter_size=1, + stride=1, + padding=0, + if_act=True, + act='relu', + name='conv5') + + # 4. pool + self._pool2d_avg = AdaptiveAvgPool2d(1) + self._out_c = stage_out_channels[-1] + # 5. fc + self._fc = Linear( + stage_out_channels[-1], + class_dim, + weight_attr=ParamAttr(name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'shufflenet_v2_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/shufflenet_v2_imagenet.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + y = self._conv1(inputs) + y = self._max_pool(y) + for inv in self._block_list: + y = inv(y) + y = self._last_conv(y) + y = self._pool2d_avg(y) + y = paddle.reshape(y, shape=[-1, self._out_c]) + y = self._fc(y) + return y diff --git a/hub_module/modules/image/classification/se_resnet18_vd_imagenet/__init__.py b/modules/image/classification/vgg16_imagenet/__init__.py similarity index 100% rename from hub_module/modules/image/classification/se_resnet18_vd_imagenet/__init__.py rename to modules/image/classification/vgg16_imagenet/__init__.py diff --git a/modules/image/classification/vgg16_imagenet/data_feed.py b/modules/image/classification/vgg16_imagenet/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1a9d790e93333f3742be9ee58215da87eb2ef819 --- /dev/null +++ b/modules/image/classification/vgg16_imagenet/data_feed.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def test_reader(paths=None, images=None): + """data generator + :param paths: path to images. + :type paths: list, each element is a str + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + """ + img_list = [] + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = Image.open(img_path) + #img = cv2.imread(img_path) + img_list.append(img) + if images is not None: + for img in images: + img_list.append(Image.fromarray(np.uint8(img))) + for im in img_list: + im = process_image(im) + yield im diff --git a/hub_module/modules/image/classification/vgg16_imagenet/label_file.txt b/modules/image/classification/vgg16_imagenet/label_file.txt similarity index 100% rename from hub_module/modules/image/classification/vgg16_imagenet/label_file.txt rename to modules/image/classification/vgg16_imagenet/label_file.txt diff --git a/modules/image/classification/vgg16_imagenet/module.py b/modules/image/classification/vgg16_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c91018d6870063303bb2d629e61d075cb561b459 --- /dev/null +++ b/modules/image/classification/vgg16_imagenet/module.py @@ -0,0 +1,232 @@ +# coding=utf-8 +import os +import ast +import argparse + +import numpy as np +import paddlehub as hub +import paddle.fluid as fluid +from paddlehub.module.module import moduleinfo, runnable +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser + +from vgg16_imagenet.vgg import VGG +from vgg16_imagenet.processor import load_label_info +from vgg16_imagenet.data_feed import test_reader + + +@moduleinfo( + name="vgg16_imagenet", + version="1.1.0", + type="cv/classification", + summary="VGG16 is a image classfication model trained with ImageNet-2012 dataset.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class VGG16(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "vgg16_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.infer_prog = None + self.pred_out = None + self._set_config() + + def get_expected_image_width(self): + return 224 + + def get_expected_image_height(self): + return 224 + + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean + + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, + input_image=None, + trainable=True, + pretrained=True, + param_prefix='', + get_prediction=False, + extra_block_filters=((256, 512, 1, 2, 3), (128, 256, 1, 2, 3), (128, 256, 0, 1, 3), (128, 256, 0, 1, + 3)), + normalizations=(20., -1, -1, -1, -1, -1)): + """Distill the Head Features, so as to perform transfer learning. + + :param input_image: image tensor. + :type input_image: + :param trainable: whether to set parameters trainable. + :type trainable: bool + :param pretrained: whether to load default pretrained model. + :type pretrained: bool + :param param_prefix: the prefix of parameters. + :type param_prefix: str + :param get_prediction: whether to get prediction. + :type get_prediction: bool + :param extra_block_filters: in each extra block, params: + [in_channel, out_channel, padding_size, stride_size, filter_size] + :type extra_block_filters: list + :param normalizations: params list of init scale in l2 norm, skip init + scale if param is -1. + :type normalizations: list + """ + context_prog = input_image.block.program if input_image else fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + image = input_image if input_image else fluid.data( + name='image', shape=[-1, 3, 224, 224], dtype='float32', lod_level=0) + + backbone = VGG( + depth=16, + with_extra_blocks=not get_prediction, + normalizations=normalizations, + extra_block_filters=extra_block_filters) + + out = backbone(image) + inputs = {'image': image} + if get_prediction: + outputs = {'pred_out': out} + else: + outputs = {'body_feats': out} + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + if not param_prefix: + fluid.io.load_vars( + exe, self.default_pretrained_model_path, main_program=context_prog, predicate=_if_exist) + else: + exe.run(startup_program) + return inputs, outputs, context_prog + + def classification(self, paths=None, images=None, use_gpu=False, batch_size=1, top_k=1): + """API of Classification. + :param paths: the path of images. + :type paths: list, each element is correspond to the path of an image. + :param images: data of images, [N, H, W, C] + :type images: numpy.ndarray + :param use_gpu: whether to use gpu or not. + :type use_gpu: bool + :param batch_size: bathc size. + :type batch_size: int + :param top_k: result of top k + :type top_k: int + """ + if self.infer_prog is None: + inputs, outputs, self.infer_prog = self.context(trainable=False, pretrained=True, get_prediction=True) + self.infer_prog = self.infer_prog.clone(for_test=True) + self.pred_out = outputs['pred_out'] + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + all_images = [] + paths = paths if paths else [] + for yield_data in test_reader(paths, images): + all_images.append(yield_data) + + images_num = len(all_images) + loop_num = int(np.ceil(images_num / batch_size)) + res_list = [] + top_k = max(min(top_k, 1000), 1) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + batch_data = np.array(batch_data).astype('float32') + data_tensor = PaddleTensor(batch_data.copy()) + if use_gpu: + result = self.gpu_predictor.run([data_tensor]) + else: + result = self.cpu_predictor.run([data_tensor]) + for i, res in enumerate(result[0].as_ndarray()): + res_dict = {} + pred_label = np.argsort(res)[::-1][:top_k] + for k in pred_label: + class_name = self.label_names[int(k)].split(',')[0] + max_prob = res[k] + res_dict[class_name] = max_prob + res_list.append(res_dict) + return res_list + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = [] + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.classification(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/classification/vgg16_imagenet/processor.py b/modules/image/classification/vgg16_imagenet/processor.py similarity index 100% rename from hub_module/modules/image/classification/vgg16_imagenet/processor.py rename to modules/image/classification/vgg16_imagenet/processor.py diff --git a/modules/image/classification/vgg16_imagenet/vgg.py b/modules/image/classification/vgg16_imagenet/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..d950c6b553d9af29086ba6f942d005920e74c296 --- /dev/null +++ b/modules/image/classification/vgg16_imagenet/vgg.py @@ -0,0 +1,184 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['VGG'] + + +class VGG(object): + """ + VGG, see https://arxiv.org/abs/1409.1556 + + Args: + depth (int): the VGG net depth (16 or 19) + normalizations (list): params list of init scale in l2 norm, skip init + scale if param is -1. + with_extra_blocks (bool): whether or not extra blocks should be added + extra_block_filters (list): in each extra block, params: + [in_channel, out_channel, padding_size, stride_size, filter_size] + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=16, + with_extra_blocks=False, + normalizations=[20., -1, -1, -1, -1, -1], + extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 0, 1, 3], + [128, 256, 0, 1, 3]], + class_dim=1000): + assert depth in [16, 19], "depth {} not in [16, 19]" + self.depth = depth + self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} + self.with_extra_blocks = with_extra_blocks + self.normalizations = normalizations + self.extra_block_filters = extra_block_filters + self.class_dim = class_dim + + def __call__(self, input): + layers = [] + layers += self._vgg_block(input) + + if not self.with_extra_blocks: + return layers[-1] + + layers += self._add_extras_block(layers[-1]) + norm_cfg = self.normalizations + for k, v in enumerate(layers): + if not norm_cfg[k] == -1: + layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) + + return layers + + def _vgg_block(self, input): + nums = self.depth_cfg[self.depth] + vgg_base = [64, 128, 256, 512, 512] + conv = input + res_layer = [] + layers = [] + for k, v in enumerate(vgg_base): + conv = self._conv_block(conv, v, nums[k], name="conv{}_".format(k + 1)) + layers.append(conv) + if self.with_extra_blocks: + if k == 4: + conv = self._pooling_block(conv, 3, 1, pool_padding=1) + else: + conv = self._pooling_block(conv, 2, 2) + else: + conv = self._pooling_block(conv, 2, 2) + if not self.with_extra_blocks: + fc_dim = 4096 + fc_name = ["fc6", "fc7", "fc8"] + fc1 = fluid.layers.fc( + input=conv, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset")) + fc2 = fluid.layers.fc( + input=fc1, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset")) + out = fluid.layers.fc( + input=fc2, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset")) + out = fluid.layers.softmax(out) + res_layer.append(out) + return [out] + else: + fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") + fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") + return [layers[3], fc7] + + def _add_extras_block(self, input): + cfg = self.extra_block_filters + conv = input + layers = [] + for k, v in enumerate(cfg): + assert len(v) == 5, "extra_block_filters size not fix" + conv = self._extra_block(conv, v[0], v[1], v[2], v[3], v[4], name="conv{}_".format(6 + k)) + layers.append(conv) + + return layers + + def _conv_block(self, input, num_filter, groups, name=None): + conv = input + for i in range(groups): + conv = self._conv_layer( + input=conv, + num_filters=num_filter, + filter_size=3, + stride=1, + padding=1, + act='relu', + name=name + str(i + 1)) + return conv + + def _extra_block(self, input, num_filters1, num_filters2, padding_size, stride_size, filter_size, name=None): + # 1x1 conv + conv_1 = self._conv_layer( + input=input, num_filters=int(num_filters1), filter_size=1, stride=1, act='relu', padding=0, name=name + "1") + + # 3x3 conv + conv_2 = self._conv_layer( + input=conv_1, + num_filters=int(num_filters2), + filter_size=filter_size, + stride=stride_size, + act='relu', + padding=padding_size, + name=name + "2") + return conv_2 + + def _conv_layer(self, + input, + num_filters, + filter_size, + stride, + padding, + dilation=1, + act='relu', + use_cudnn=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + dilation=dilation, + act=act, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + "_biases") if self.with_extra_blocks else False, + name=name + '.conv2d.output.1') + return conv + + def _pooling_block(self, conv, pool_size, pool_stride, pool_padding=0, ceil_mode=True): + pool = fluid.layers.pool2d( + input=conv, + pool_size=pool_size, + pool_type='max', + pool_stride=pool_stride, + pool_padding=pool_padding, + ceil_mode=ceil_mode) + return pool + + def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): + from paddle.fluid.layer_helper import LayerHelper + from paddle.fluid.initializer import Constant + helper = LayerHelper("Scale") + l2_norm = fluid.layers.l2_normalize(input, axis=1) # l2 norm along channel + shape = [1] if channel_shared else [input.shape[1]] + scale = helper.create_parameter( + attr=helper.param_attr, shape=shape, dtype=input.dtype, default_initializer=Constant(init_scale)) + out = fluid.layers.elementwise_mul( + x=l2_norm, y=scale, axis=-1 if channel_shared else 1, name="conv4_3_norm_scale") + return out diff --git a/modules/image/classification/xception41_imagenet/module.py b/modules/image/classification/xception41_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e8d7e0782c3b12d58d1f9bb5f50fa7057616b1ae --- /dev/null +++ b/modules/image/classification/xception41_imagenet/module.py @@ -0,0 +1,323 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import math + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = "bn_" + name + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class SeparableConv(nn.Layer): + """Basic separable conv layer, it contains pointwise conv and depthwise conv.""" + + def __init__(self, input_channels: int, output_channels: int, stride: int = 1, name: str = None): + super(SeparableConv, self).__init__() + + self._pointwise_conv = ConvBNLayer(input_channels, output_channels, 1, name=name + "_sep") + self._depthwise_conv = ConvBNLayer( + output_channels, output_channels, 3, stride=stride, groups=output_channels, name=name + "_dw") + + def forward(self, inputs: paddle.Tensor): + x = self._pointwise_conv(inputs) + x = self._depthwise_conv(x) + return x + + +class EntryFlowBottleneckBlock(nn.Layer): + """Basic entry flow bottleneck block for Xception.""" + + def __init__(self, + input_channels: int, + output_channels: int, + stride: int = 2, + name: str = None, + relu_first: bool = False): + super(EntryFlowBottleneckBlock, self).__init__() + self.relu_first = relu_first + + self._short = Conv2d( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=1, + stride=stride, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv1 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") + self._conv2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") + self._pool = MaxPool2d(kernel_size=3, stride=stride, padding=1) + + def forward(self, inputs: paddle.Tensor): + conv0 = inputs + short = self._short(inputs) + if self.relu_first: + conv0 = F.relu(conv0) + conv1 = self._conv1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv2(conv2) + pool = self._pool(conv2) + return paddle.elementwise_add(x=short, y=pool) + + +class EntryFlow(nn.Layer): + """Entry flow for Xception.""" + + def __init__(self, block_num: int = 3): + super(EntryFlow, self).__init__() + + name = "entry_flow" + self.block_num = block_num + self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1") + self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") + if block_num == 3: + self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=2, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock(256, 728, stride=2, name=name + "_2", relu_first=True) + elif block_num == 5: + self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=1, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock(256, 256, stride=2, name=name + "_2", relu_first=True) + self._conv_3 = EntryFlowBottleneckBlock(256, 728, stride=1, name=name + "_3", relu_first=True) + self._conv_4 = EntryFlowBottleneckBlock(728, 728, stride=2, name=name + "_4", relu_first=True) + else: + sys.exit(-1) + + def forward(self, inputs: paddle.Tensor): + x = self._conv1(inputs) + x = self._conv2(x) + + if self.block_num == 3: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + elif self.block_num == 5: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + return x + + +class MiddleFlowBottleneckBlock(nn.Layer): + """Basic middle flow bottleneck block for Xception.""" + + def __init__(self, input_channels: int, output_channels: int, name: str): + super(MiddleFlowBottleneckBlock, self).__init__() + + self._conv_0 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") + self._conv_1 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") + self._conv_2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2c_weights") + + def forward(self, inputs: paddle.Tensor): + conv0 = F.relu(inputs) + conv0 = self._conv_0(conv0) + conv1 = F.relu(conv0) + conv1 = self._conv_1(conv1) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + return paddle.elementwise_add(x=inputs, y=conv2) + + +class MiddleFlow(nn.Layer): + """Middle flow for Xception.""" + + def __init__(self, block_num: int = 8): + super(MiddleFlow, self).__init__() + + self.block_num = block_num + self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") + self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") + self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") + self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") + self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") + self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") + self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") + self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") + if block_num == 16: + self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8") + self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9") + self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10") + self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11") + self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12") + self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13") + self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14") + self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15") + + def forward(self, inputs: paddle.Tensor): + x = self._conv_0(inputs) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + x = self._conv_5(x) + x = self._conv_6(x) + x = self._conv_7(x) + if self.block_num == 16: + x = self._conv_8(x) + x = self._conv_9(x) + x = self._conv_10(x) + x = self._conv_11(x) + x = self._conv_12(x) + x = self._conv_13(x) + x = self._conv_14(x) + x = self._conv_15(x) + return x + + +class ExitFlowBottleneckBlock(nn.Layer): + """Basic exit flow bottleneck block for Xception.""" + + def __init__(self, input_channels: int, output_channels1: int, output_channels2: int, name: str): + super(ExitFlowBottleneckBlock, self).__init__() + + self._short = Conv2d( + in_channels=input_channels, + out_channels=output_channels2, + kernel_size=1, + stride=2, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv_1 = SeparableConv(input_channels, output_channels1, stride=1, name=name + "_branch2a_weights") + self._conv_2 = SeparableConv(output_channels1, output_channels2, stride=1, name=name + "_branch2b_weights") + self._pool = MaxPool2d(kernel_size=3, stride=2, padding=1) + + def forward(self, inputs: paddle.Tensor): + short = self._short(inputs) + conv0 = F.relu(inputs) + conv1 = self._conv_1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + pool = self._pool(conv2) + return paddle.elementwise_add(x=short, y=pool) + + +class ExitFlow(nn.Layer): + """Exit flow for Xception.""" + + def __init__(self, class_dim: int): + super(ExitFlow, self).__init__() + + name = "exit_flow" + + self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") + self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") + self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") + self._pool = AdaptiveAvgPool2d(1) + stdv = 1.0 / math.sqrt(2048 * 1.0) + self._out = Linear( + 2048, + class_dim, + weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs: paddle.Tensor): + conv0 = self._conv_0(inputs) + conv1 = self._conv_1(conv0) + conv1 = F.relu(conv1) + conv2 = self._conv_2(conv1) + conv2 = F.relu(conv2) + pool = self._pool(conv2) + pool = paddle.reshape(pool, [0, -1]) + out = self._out(pool) + return out + + +@moduleinfo( + name="xception41_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="Xception41_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class Xception41(nn.Layer): + """Xception41 model.""" + + def __init__(self, class_dim: int = 1000, load_checkpoint: str = None): + super(Xception41, self).__init__() + self.entry_flow_block_num = 3 + self.middle_flow_block_num = 8 + self._entry_flow = EntryFlow(self.entry_flow_block_num) + self._middle_flow = MiddleFlow(self.middle_flow_block_num) + self._exit_flow = ExitFlow(class_dim) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'xception41_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/xception41_imagenet.pdparams -O' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs: paddle.Tensor): + x = self._entry_flow(inputs) + x = self._middle_flow(x) + x = self._exit_flow(x) + return x diff --git a/modules/image/classification/xception65_imagenet/module.py b/modules/image/classification/xception65_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..127279f10d12d80c486f1841246274d51210a954 --- /dev/null +++ b/modules/image/classification/xception65_imagenet/module.py @@ -0,0 +1,321 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import math + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = "bn_" + name + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class SeparableConv(nn.Layer): + """Basic separable conv layer, it contains pointwise conv and depthwise conv.""" + + def __init__(self, input_channels: int, output_channels: int, stride: int = 1, name: str = None): + super(SeparableConv, self).__init__() + + self._pointwise_conv = ConvBNLayer(input_channels, output_channels, 1, name=name + "_sep") + self._depthwise_conv = ConvBNLayer( + output_channels, output_channels, 3, stride=stride, groups=output_channels, name=name + "_dw") + + def forward(self, inputs: paddle.Tensor): + x = self._pointwise_conv(inputs) + x = self._depthwise_conv(x) + return x + + +class EntryFlowBottleneckBlock(nn.Layer): + """Basic entry flow bottleneck block for Xception.""" + + def __init__(self, + input_channels: int, + output_channels: int, + stride: int = 2, + name: str = None, + relu_first: bool = False): + super(EntryFlowBottleneckBlock, self).__init__() + self.relu_first = relu_first + + self._short = Conv2d( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=1, + stride=stride, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv1 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") + self._conv2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") + self._pool = MaxPool2d(kernel_size=3, stride=stride, padding=1) + + def forward(self, inputs: paddle.Tensor): + conv0 = inputs + short = self._short(inputs) + if self.relu_first: + conv0 = F.relu(conv0) + conv1 = self._conv1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv2(conv2) + pool = self._pool(conv2) + return paddle.elementwise_add(x=short, y=pool) + + +class EntryFlow(nn.Layer): + """Entry flow for Xception.""" + + def __init__(self, block_num: int = 3): + super(EntryFlow, self).__init__() + + name = "entry_flow" + self.block_num = block_num + self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1") + self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") + if block_num == 3: + self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=2, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock(256, 728, stride=2, name=name + "_2", relu_first=True) + elif block_num == 5: + self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=1, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock(256, 256, stride=2, name=name + "_2", relu_first=True) + self._conv_3 = EntryFlowBottleneckBlock(256, 728, stride=1, name=name + "_3", relu_first=True) + self._conv_4 = EntryFlowBottleneckBlock(728, 728, stride=2, name=name + "_4", relu_first=True) + else: + sys.exit(-1) + + def forward(self, inputs: paddle.Tensor): + x = self._conv1(inputs) + x = self._conv2(x) + + if self.block_num == 3: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + elif self.block_num == 5: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + return x + + +class MiddleFlowBottleneckBlock(nn.Layer): + """Basic middle flow bottleneck block for Xception.""" + + def __init__(self, input_channels: int, output_channels: int, name: str): + super(MiddleFlowBottleneckBlock, self).__init__() + + self._conv_0 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") + self._conv_1 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") + self._conv_2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2c_weights") + + def forward(self, inputs: paddle.Tensor): + conv0 = F.relu(inputs) + conv0 = self._conv_0(conv0) + conv1 = F.relu(conv0) + conv1 = self._conv_1(conv1) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + return paddle.elementwise_add(x=inputs, y=conv2) + + +class MiddleFlow(nn.Layer): + """Middle flow for Xception.""" + + def __init__(self, block_num: int = 8): + super(MiddleFlow, self).__init__() + + self.block_num = block_num + self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") + self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") + self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") + self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") + self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") + self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") + self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") + self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") + if block_num == 16: + self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8") + self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9") + self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10") + self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11") + self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12") + self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13") + self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14") + self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15") + + def forward(self, inputs: paddle.Tensor): + x = self._conv_0(inputs) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + x = self._conv_5(x) + x = self._conv_6(x) + x = self._conv_7(x) + if self.block_num == 16: + x = self._conv_8(x) + x = self._conv_9(x) + x = self._conv_10(x) + x = self._conv_11(x) + x = self._conv_12(x) + x = self._conv_13(x) + x = self._conv_14(x) + x = self._conv_15(x) + return x + + +class ExitFlowBottleneckBlock(nn.Layer): + """Basic exit flow bottleneck block for Xception.""" + + def __init__(self, input_channels, output_channels1, output_channels2, name): + super(ExitFlowBottleneckBlock, self).__init__() + + self._short = Conv2d( + in_channels=input_channels, + out_channels=output_channels2, + kernel_size=1, + stride=2, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv_1 = SeparableConv(input_channels, output_channels1, stride=1, name=name + "_branch2a_weights") + self._conv_2 = SeparableConv(output_channels1, output_channels2, stride=1, name=name + "_branch2b_weights") + self._pool = MaxPool2d(kernel_size=3, stride=2, padding=1) + + def forward(self, inputs: paddle.Tensor): + short = self._short(inputs) + conv0 = F.relu(inputs) + conv1 = self._conv_1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + pool = self._pool(conv2) + return paddle.elementwise_add(x=short, y=pool) + + +class ExitFlow(nn.Layer): + """Exit flow for Xception.""" + + def __init__(self, class_dim): + super(ExitFlow, self).__init__() + + name = "exit_flow" + + self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") + self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") + self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") + self._pool = AdaptiveAvgPool2d(1) + stdv = 1.0 / math.sqrt(2048 * 1.0) + self._out = Linear( + 2048, + class_dim, + weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs: paddle.Tensor): + conv0 = self._conv_0(inputs) + conv1 = self._conv_1(conv0) + conv1 = F.relu(conv1) + conv2 = self._conv_2(conv1) + conv2 = F.relu(conv2) + pool = self._pool(conv2) + pool = paddle.reshape(pool, [0, -1]) + out = self._out(pool) + return out + + +@moduleinfo( + name="xception65_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="Xception65_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class Xception65(nn.Layer): + def __init__(self, class_dim=1000, load_checkpoint: str = None): + super(Xception65, self).__init__() + self.entry_flow_block_num = 3 + self.middle_flow_block_num = 16 + self._entry_flow = EntryFlow(self.entry_flow_block_num) + self._middle_flow = MiddleFlow(self.middle_flow_block_num) + self._exit_flow = ExitFlow(class_dim) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'xception65_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/xception65_imagenet.pdparams -O' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs): + x = self._entry_flow(inputs) + x = self._middle_flow(x) + x = self._exit_flow(x) + return x diff --git a/modules/image/classification/xception71_imagenet/module.py b/modules/image/classification/xception71_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..43f86f5605dcb3f0382a0f2631664638a179b3cc --- /dev/null +++ b/modules/image/classification/xception71_imagenet/module.py @@ -0,0 +1,319 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import math + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2d, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d +from paddle.nn.initializer import Uniform +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +class ConvBNLayer(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, + num_channels: int, + num_filters: int, + filter_size: int, + stride: int = 1, + groups: int = 1, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = "bn_" + name + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def forward(self, inputs: paddle.Tensor): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class SeparableConv(nn.Layer): + """Basic separable conv layer, it contains pointwise conv and depthwise conv.""" + + def __init__(self, input_channels: int, output_channels: int, stride: int = 1, name: str = None): + super(SeparableConv, self).__init__() + + self._pointwise_conv = ConvBNLayer(input_channels, output_channels, 1, name=name + "_sep") + self._depthwise_conv = ConvBNLayer( + output_channels, output_channels, 3, stride=stride, groups=output_channels, name=name + "_dw") + + def forward(self, inputs: paddle.Tensor): + x = self._pointwise_conv(inputs) + x = self._depthwise_conv(x) + return x + + +class EntryFlowBottleneckBlock(nn.Layer): + """Basic entry flow bottleneck block for Xception.""" + + def __init__(self, + input_channels: int, + output_channels: int, + stride: int = 2, + name: str = None, + relu_first: bool = False): + super(EntryFlowBottleneckBlock, self).__init__() + self.relu_first = relu_first + + self._short = Conv2d( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=1, + stride=stride, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv1 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") + self._conv2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") + self._pool = MaxPool2d(kernel_size=3, stride=stride, padding=1) + + def forward(self, inputs: paddle.Tensor): + conv0 = inputs + short = self._short(inputs) + if self.relu_first: + conv0 = F.relu(conv0) + conv1 = self._conv1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv2(conv2) + pool = self._pool(conv2) + return paddle.elementwise_add(x=short, y=pool) + + +class EntryFlow(nn.Layer): + """Entry flow for Xception.""" + + def __init__(self, block_num: int = 3): + super(EntryFlow, self).__init__() + + name = "entry_flow" + self.block_num = block_num + self._conv1 = ConvBNLayer(3, 32, 3, stride=2, act="relu", name=name + "_conv1") + self._conv2 = ConvBNLayer(32, 64, 3, act="relu", name=name + "_conv2") + if block_num == 3: + self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=2, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock(256, 728, stride=2, name=name + "_2", relu_first=True) + elif block_num == 5: + self._conv_0 = EntryFlowBottleneckBlock(64, 128, stride=2, name=name + "_0", relu_first=False) + self._conv_1 = EntryFlowBottleneckBlock(128, 256, stride=1, name=name + "_1", relu_first=True) + self._conv_2 = EntryFlowBottleneckBlock(256, 256, stride=2, name=name + "_2", relu_first=True) + self._conv_3 = EntryFlowBottleneckBlock(256, 728, stride=1, name=name + "_3", relu_first=True) + self._conv_4 = EntryFlowBottleneckBlock(728, 728, stride=2, name=name + "_4", relu_first=True) + else: + sys.exit(-1) + + def forward(self, inputs: paddle.Tensor): + x = self._conv1(inputs) + x = self._conv2(x) + + if self.block_num == 3: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + elif self.block_num == 5: + x = self._conv_0(x) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + return x + + +class MiddleFlowBottleneckBlock(nn.Layer): + """Basic middle flow bottleneck block for Xception.""" + + def __init__(self, input_channels: int, output_channels: int, name: str): + super(MiddleFlowBottleneckBlock, self).__init__() + + self._conv_0 = SeparableConv(input_channels, output_channels, stride=1, name=name + "_branch2a_weights") + self._conv_1 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2b_weights") + self._conv_2 = SeparableConv(output_channels, output_channels, stride=1, name=name + "_branch2c_weights") + + def forward(self, inputs: paddle.Tensor): + conv0 = F.relu(inputs) + conv0 = self._conv_0(conv0) + conv1 = F.relu(conv0) + conv1 = self._conv_1(conv1) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + return paddle.elementwise_add(x=inputs, y=conv2) + + +class MiddleFlow(nn.Layer): + """Middle flow for Xception.""" + + def __init__(self, block_num: int = 8): + super(MiddleFlow, self).__init__() + + self.block_num = block_num + self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") + self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") + self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") + self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") + self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") + self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") + self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") + self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") + if block_num == 16: + self._conv_8 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_8") + self._conv_9 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_9") + self._conv_10 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_10") + self._conv_11 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_11") + self._conv_12 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_12") + self._conv_13 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_13") + self._conv_14 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_14") + self._conv_15 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_15") + + def forward(self, inputs: paddle.Tensor): + x = self._conv_0(inputs) + x = self._conv_1(x) + x = self._conv_2(x) + x = self._conv_3(x) + x = self._conv_4(x) + x = self._conv_5(x) + x = self._conv_6(x) + x = self._conv_7(x) + if self.block_num == 16: + x = self._conv_8(x) + x = self._conv_9(x) + x = self._conv_10(x) + x = self._conv_11(x) + x = self._conv_12(x) + x = self._conv_13(x) + x = self._conv_14(x) + x = self._conv_15(x) + return x + + +class ExitFlowBottleneckBlock(nn.Layer): + """Basic exit flow bottleneck block for Xception.""" + + def __init__(self, input_channels: int, output_channels1: int, output_channels2: int, name: str): + super(ExitFlowBottleneckBlock, self).__init__() + + self._short = Conv2d( + in_channels=input_channels, + out_channels=output_channels2, + kernel_size=1, + stride=2, + padding=0, + weight_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + self._conv_1 = SeparableConv(input_channels, output_channels1, stride=1, name=name + "_branch2a_weights") + self._conv_2 = SeparableConv(output_channels1, output_channels2, stride=1, name=name + "_branch2b_weights") + self._pool = MaxPool2d(kernel_size=3, stride=2, padding=1) + + def forward(self, inputs: paddle.Tensor): + short = self._short(inputs) + conv0 = F.relu(inputs) + conv1 = self._conv_1(conv0) + conv2 = F.relu(conv1) + conv2 = self._conv_2(conv2) + pool = self._pool(conv2) + return paddle.elementwise_add(x=short, y=pool) + + +class ExitFlow(nn.Layer): + def __init__(self, class_dim: int): + super(ExitFlow, self).__init__() + + name = "exit_flow" + + self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") + self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") + self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") + self._pool = AdaptiveAvgPool2d(1) + stdv = 1.0 / math.sqrt(2048 * 1.0) + self._out = Linear( + 2048, + class_dim, + weight_attr=ParamAttr(name="fc_weights", initializer=Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name="fc_offset")) + + def forward(self, inputs: paddle.Tensor): + conv0 = self._conv_0(inputs) + conv1 = self._conv_1(conv0) + conv1 = F.relu(conv1) + conv2 = self._conv_2(conv1) + conv2 = F.relu(conv2) + pool = self._pool(conv2) + pool = paddle.reshape(pool, [0, -1]) + out = self._out(pool) + return out + + +@moduleinfo( + name="xception71_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="Xception71_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.1.0", + meta=ImageClassifierModule) +class Xception71(nn.Layer): + def __init__(self, class_dim=1000, load_checkpoint: str = None): + super(Xception71, self).__init__() + self.entry_flow_block_num = 5 + self.middle_flow_block_num = 16 + self._entry_flow = EntryFlow(self.entry_flow_block_num) + self._middle_flow = MiddleFlow(self.middle_flow_block_num) + self._exit_flow = ExitFlow(class_dim) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'xception71_imagenet.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/xception71_imagenet.pdparams -O' + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def forward(self, inputs): + x = self._entry_flow(inputs) + x = self._middle_flow(x) + x = self._exit_flow(x) + return x diff --git a/modules/image/colorization/user_guided_colorization/data_feed.py b/modules/image/colorization/user_guided_colorization/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..94aada5b0ded46764b465ba8cbcfa55987b11913 --- /dev/null +++ b/modules/image/colorization/user_guided_colorization/data_feed.py @@ -0,0 +1,133 @@ +import paddle +import numpy as np + + +class ColorizeHint: + """Get hint and mask images for colorization. + + This method is prepared for user guided colorization tasks. Take the original RGB images as imput, + we will obtain the local hints and correspoding mask to guid colorization process. + + Args: + percent(float): Probability for ignoring hint in an iteration. + num_points(int): Number of selected hints in an iteration. + samp(str): Sample method, default is normal. + use_avg(bool): Whether to use mean in selected hint area. + + Return: + hint(np.ndarray): hint images + mask(np.ndarray): mask images + """ + + def __init__(self, percent: float, num_points: int = None, samp: str = 'normal', use_avg: bool = True): + self.percent = percent + self.num_points = num_points + self.samp = samp + self.use_avg = use_avg + + def __call__(self, data: np.ndarray, hint: np.ndarray, mask: np.ndarray): + sample_Ps = [1, 2, 3, 4, 5, 6, 7, 8, 9] + self.data = data + self.hint = hint + self.mask = mask + N, C, H, W = data.shape + for nn in range(N): + pp = 0 + cont_cond = True + while cont_cond: + if self.num_points is None: # draw from geometric + # embed() + cont_cond = np.random.rand() > (1 - self.percent) + else: # add certain number of points + cont_cond = pp < self.num_points + if not cont_cond: # skip out of loop if condition not met + continue + P = np.random.choice(sample_Ps) # patch size + # sample location + if self.samp == 'normal': # geometric distribution + h = int(np.clip(np.random.normal((H - P + 1) / 2., (H - P + 1) / 4.), 0, H - P)) + w = int(np.clip(np.random.normal((W - P + 1) / 2., (W - P + 1) / 4.), 0, W - P)) + else: # uniform distribution + h = np.random.randint(H - P + 1) + w = np.random.randint(W - P + 1) + # add color point + if self.use_avg: + # embed() + hint[nn, :, h:h + P, w:w + P] = np.mean( + np.mean(data[nn, :, h:h + P, w:w + P], axis=2, keepdims=True), axis=1, keepdims=True).reshape( + 1, C, 1, 1) + else: + hint[nn, :, h:h + P, w:w + P] = data[nn, :, h:h + P, w:w + P] + mask[nn, :, h:h + P, w:w + P] = 1 + # increment counter + pp += 1 + + mask -= 0.5 + return hint, mask + + +class ColorizePreprocess: + """Prepare dataset for image Colorization. + + Args: + ab_thresh(float): Thresh value for setting mask value. + p(float): Probability for ignoring hint in an iteration. + num_points(int): Number of selected hints in an iteration. + samp(str): Sample method, default is normal. + use_avg(bool): Whether to use mean in selected hint area. + is_train(bool): Training process or not. + + Return: + data(dict):The preprocessed data for colorization. + + """ + + def __init__(self, + ab_thresh: float = 0., + p: float = 0., + num_points: int = None, + samp: str = 'normal', + use_avg: bool = True): + self.ab_thresh = ab_thresh + self.p = p + self.num_points = num_points + self.samp = samp + self.use_avg = use_avg + self.gethint = ColorizeHint(percent=self.p, num_points=self.num_points, samp=self.samp, use_avg=self.use_avg) + + def __call__(self, data_lab): + """ + This method seperates the L channel and AB channel, obtain hint, mask and real_B_enc as the input for colorization task. + + Args: + img(np.ndarray|paddle.Tensor): LAB image. + + Returns: + data(dict):The preprocessed data for colorization. + """ + if type(data_lab) is not np.ndarray: + data_lab = data_lab.numpy() + data = {} + A = 2 * 110 / 10 + 1 + data['A'] = data_lab[:, [0], :, :] + data['B'] = data_lab[:, 1:, :, :] + if self.ab_thresh > 0: # mask out grayscale images + thresh = 1. * self.ab_thresh / 110 + mask = np.sum( + np.abs(np.max(np.max(data['B'], axis=3), axis=2) - np.min(np.min(data['B'], axis=3), axis=2)), axis=1) + mask = (mask >= thresh) + data['A'] = data['A'][mask, :, :, :] + data['B'] = data['B'][mask, :, :, :] + if np.sum(mask) == 0: + return None + data_ab_rs = np.round((data['B'][:, :, ::4, ::4] * 110. + 110.) / 10.) # normalized bin number + data['real_B_enc'] = data_ab_rs[:, [0], :, :] * A + data_ab_rs[:, [1], :, :] + data['hint_B'] = np.zeros(shape=data['B'].shape) + data['mask_B'] = np.zeros(shape=data['A'].shape) + data['hint_B'], data['mask_B'] = self.gethint(data['B'], data['hint_B'], data['mask_B']) + data['A'] = paddle.to_tensor(data['A'].astype(np.float32)) + data['B'] = paddle.to_tensor(data['B'].astype(np.float32)) + data['real_B_enc'] = paddle.to_tensor(data['real_B_enc'].astype(np.int64)) + data['hint_B'] = paddle.to_tensor(data['hint_B'].astype(np.float32)) + data['mask_B'] = paddle.to_tensor(data['mask_B'].astype(np.float32)) + return data diff --git a/modules/image/colorization/user_guided_colorization/module.py b/modules/image/colorization/user_guided_colorization/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e31017aa613b9ed4b7b9b56072e0afee088632bb --- /dev/null +++ b/modules/image/colorization/user_guided_colorization/module.py @@ -0,0 +1,247 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn as nn +from paddle.nn import Conv2D, Conv2DTranspose +from paddlehub.module.module import moduleinfo +import paddlehub.process.transforms as T +from paddlehub.module.cv_module import ImageColorizeModule +from user_guided_colorization.data_feed import ColorizePreprocess + + +@moduleinfo( + name="user_guided_colorization", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="User_guided_colorization is a image colorization model, this module is trained with ILSVRC2012 dataset.", + version="1.0.0", + meta=ImageColorizeModule) +class UserGuidedColorization(nn.Layer): + """ + Userguidedcolorization, see https://github.com/haoyuying/colorization-pytorch + + Args: + use_tanh (bool): Whether to use tanh as final activation function. + classification (bool): Whether to switch classification branch for optimization. + load_checkpoint (str): Pretrained checkpoint path. + + """ + + def __init__(self, use_tanh: bool = True, classification: bool = True, load_checkpoint: str = None): + super(UserGuidedColorization, self).__init__() + self.input_nc = 4 + self.output_nc = 2 + self.classification = classification + # Conv1 + model1 = ( + Conv2D(self.input_nc, 64, 3, 1, 1), + nn.ReLU(), + Conv2D(64, 64, 3, 1, 1), + nn.ReLU(), + nn.BatchNorm(64), + ) + + # Conv2 + model2 = ( + Conv2D(64, 128, 3, 1, 1), + nn.ReLU(), + Conv2D(128, 128, 3, 1, 1), + nn.ReLU(), + nn.BatchNorm(128), + ) + + # Conv3 + model3 = ( + Conv2D(128, 256, 3, 1, 1), + nn.ReLU(), + Conv2D(256, 256, 3, 1, 1), + nn.ReLU(), + Conv2D(256, 256, 3, 1, 1), + nn.ReLU(), + nn.BatchNorm(256), + ) + + # Conv4 + model4 = ( + Conv2D(256, 512, 3, 1, 1), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 1), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 1), + nn.ReLU(), + nn.BatchNorm(512), + ) + + # Conv5 + model5 = ( + Conv2D(512, 512, 3, 1, 2, 2), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 2, 2), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 2, 2), + nn.ReLU(), + nn.BatchNorm(512), + ) + + # Conv6 + model6 = ( + Conv2D(512, 512, 3, 1, 2, 2), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 2, 2), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 2, 2), + nn.ReLU(), + nn.BatchNorm(512), + ) + + # Conv7 + model7 = ( + Conv2D(512, 512, 3, 1, 1), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 1), + nn.ReLU(), + Conv2D(512, 512, 3, 1, 1), + nn.ReLU(), + nn.BatchNorm(512), + ) + + # Conv8 + model8up = (Conv2DTranspose(512, 256, kernel_size=4, stride=2, padding=1), ) + model3short8 = (Conv2D(256, 256, 3, 1, 1), ) + model8 = ( + nn.ReLU(), + Conv2D(256, 256, 3, 1, 1), + nn.ReLU(), + Conv2D(256, 256, 3, 1, 1), + nn.ReLU(), + nn.BatchNorm(256), + ) + + # Conv9 + model9up = (Conv2DTranspose(256, 128, kernel_size=4, stride=2, padding=1), ) + model2short9 = (Conv2D( + 128, + 128, + 3, + 1, + 1, + ), ) + model9 = (nn.ReLU(), Conv2D(128, 128, 3, 1, 1), nn.ReLU(), nn.BatchNorm(128)) + + # Conv10 + model10up = (Conv2DTranspose(128, 128, kernel_size=4, stride=2, padding=1), ) + model1short10 = (Conv2D(64, 128, 3, 1, 1), ) + model10 = (nn.ReLU(), Conv2D(128, 128, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2)) + model_class = (Conv2D(256, 529, 1), ) + + if use_tanh: + model_out = (Conv2D(128, 2, 1, 1, 0, 1), nn.Tanh()) + else: + model_out = (Conv2D(128, 2, 1, 1, 0, 1), ) + + self.model1 = nn.Sequential(*model1) + self.model2 = nn.Sequential(*model2) + self.model3 = nn.Sequential(*model3) + self.model4 = nn.Sequential(*model4) + self.model5 = nn.Sequential(*model5) + self.model6 = nn.Sequential(*model6) + self.model7 = nn.Sequential(*model7) + self.model8up = nn.Sequential(*model8up) + self.model8 = nn.Sequential(*model8) + self.model9up = nn.Sequential(*model9up) + self.model9 = nn.Sequential(*model9) + self.model10up = nn.Sequential(*model10up) + self.model10 = nn.Sequential(*model10) + self.model3short8 = nn.Sequential(*model3short8) + self.model2short9 = nn.Sequential(*model2short9) + self.model1short10 = nn.Sequential(*model1short10) + self.model_class = nn.Sequential(*model_class) + self.model_out = nn.Sequential(*model_out) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint) + self.set_dict(model_dict) + print("load custom checkpoint success") + else: + checkpoint = os.path.join(self.directory, 'user_guided.pdparams') + if not os.path.exists(checkpoint): + os.system('wget https://paddlehub.bj.bcebos.com/dygraph/image_colorization/user_guided.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint) + + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def transforms(self, images: str, is_train: bool = True) -> callable: + if is_train: + transform = T.Compose( + [T.Resize((256, 256), interp='NEAREST'), + T.RandomPaddingCrop(crop_size=176), + T.RGB2LAB()], + stay_rgb=True, + is_permute=False) + else: + transform = T.Compose([T.Resize( + (256, 256), interp='NEAREST'), T.RGB2LAB()], + stay_rgb=True, + is_permute=False) + return transform(images) + + def preprocess(self, inputs: paddle.Tensor, ab_thresh: float = 0., prob: float = 0.): + self.preprocess = ColorizePreprocess(ab_thresh=ab_thresh, p=prob) + return self.preprocess(inputs) + + def forward(self, + input_A: paddle.Tensor, + input_B: paddle.Tensor, + mask_B: paddle.Tensor, + real_b: paddle.Tensor = None, + real_B_enc: paddle.Tensor = None) -> paddle.Tensor: + conv1_2 = self.model1(paddle.concat([input_A, input_B, mask_B], axis=1)) + conv2_2 = self.model2(conv1_2[:, :, ::2, ::2]) + conv3_3 = self.model3(conv2_2[:, :, ::2, ::2]) + conv4_3 = self.model4(conv3_3[:, :, ::2, ::2]) + conv5_3 = self.model5(conv4_3) + conv6_3 = self.model6(conv5_3) + conv7_3 = self.model7(conv6_3) + conv8_up = self.model8up(conv7_3) + self.model3short8(conv3_3) + conv8_3 = self.model8(conv8_up) + + if self.classification: + out_class = self.model_class(conv8_3) + conv9_up = self.model9up(conv8_3.detach()) + self.model2short9(conv2_2.detach()) + conv9_3 = self.model9(conv9_up) + conv10_up = self.model10up(conv9_3) + self.model1short10(conv1_2.detach()) + conv10_2 = self.model10(conv10_up) + out_reg = self.model_out(conv10_2) + else: + out_class = self.model_class(conv8_3.detach()) + conv9_up = self.model9up(conv8_3) + self.model2short9(conv2_2) + conv9_3 = self.model9(conv9_up) + conv10_up = self.model10up(conv9_3) + self.model1short10(conv1_2) + conv10_2 = self.model10(conv10_up) + out_reg = self.model_out(conv10_2) + + return out_class, out_reg + + +if __name__ == "__main__": + place = paddle.CUDAPlace(0) + paddle.disable_static() + model = UserGuidedColorization() + model.eval() diff --git a/hub_module/modules/image/face_detection/README.md b/modules/image/face_detection/README.md similarity index 100% rename from hub_module/modules/image/face_detection/README.md rename to modules/image/face_detection/README.md diff --git a/hub_module/modules/image/face_detection/pyramidbox_face_detection/README.md b/modules/image/face_detection/pyramidbox_face_detection/README.md similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_face_detection/README.md rename to modules/image/face_detection/pyramidbox_face_detection/README.md diff --git a/hub_module/modules/image/classification/vgg16_imagenet/__init__.py b/modules/image/face_detection/pyramidbox_face_detection/__init__.py similarity index 100% rename from hub_module/modules/image/classification/vgg16_imagenet/__init__.py rename to modules/image/face_detection/pyramidbox_face_detection/__init__.py diff --git a/modules/image/face_detection/pyramidbox_face_detection/data_feed.py b/modules/image/face_detection/pyramidbox_face_detection/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..65b914c25d6d45526adf09988b88974525828e77 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_face_detection/data_feed.py @@ -0,0 +1,122 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def preprocess(image): + if image.mode == 'L': + image = image.convert('RGB') + shrink, max_shrink = get_shrink(image.size[1], image.size[0]) + image_shape = [3, image.size[1], image.size[0]] + if shrink != 1: + h, w = int(image_shape[1] * shrink), int(image_shape[2] * shrink) + image = image.resize((w, h), Image.ANTIALIAS) + image_shape = [3, h, w] + + img = np.array(image) + img = to_chw_bgr(img) + mean = [104., 117., 123.] + scale = 0.007843 + img = img.astype('float32') + img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') + img = img * scale + img = np.array(img) + return img + + +def to_chw_bgr(image): + """ + Transpose image from HWC to CHW and from RBG to BGR. + + Args: + image (np.array): an image with HWC and RBG layout. + """ + # HWC to CHW + if len(image.shape) == 3: + image = np.swapaxes(image, 1, 2) + image = np.swapaxes(image, 1, 0) + # RBG to BGR + image = image[[2, 1, 0], :, :] + return image + + +def get_shrink(height, width): + """ + shrink the original image according to the org_im_height and org_im_width. + calculate the value of shrink. + + Args: + height (int): image height. + width (int): image width. + """ + max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5 + max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5 + + def get_round(x, loc): + str_x = str(x) + if '.' in str_x: + str_before, str_after = str_x.split('.') + len_after = len(str_after) + if len_after >= 3: + str_final = str_before + '.' + str_after[0:loc] + return float(str_final) + else: + return x + + max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3 + if max_shrink >= 1.5 and max_shrink < 2: + max_shrink = max_shrink - 0.1 + elif max_shrink >= 2 and max_shrink < 3: + max_shrink = max_shrink - 0.2 + elif max_shrink >= 3 and max_shrink < 4: + max_shrink = max_shrink - 0.3 + elif max_shrink >= 4 and max_shrink < 5: + max_shrink = max_shrink - 0.4 + elif max_shrink >= 5: + max_shrink = max_shrink - 0.5 + elif max_shrink <= 0.1: + max_shrink = 0.1 + shrink = max_shrink if max_shrink < 1 else 1 + return shrink, max_shrink + + +def reader(images, paths): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths is not None: + assert type(paths) is list, "paths should be a list." + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + each['org_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + component.append(each) + + for element in component: + element['image'] = preprocess(element['org_im']) + yield element diff --git a/modules/image/face_detection/pyramidbox_face_detection/module.py b/modules/image/face_detection/pyramidbox_face_detection/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8b44a11da263b1b4fe4e0f55ca26c9d7929f1241 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_face_detection/module.py @@ -0,0 +1,182 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from pyramidbox_face_detection.data_feed import reader +from pyramidbox_face_detection.processor import postprocess, base64_to_cv2 + + +@moduleinfo( + name="pyramidbox_face_detection", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary="Baidu's PyramidBox model for face detection.", + version="1.1.0") +class PyramidBoxFaceDetection(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_face_detection_widerface") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def face_detection(self, + images=None, + paths=None, + data=None, + use_gpu=False, + output_dir='detection_result', + visualization=False, + score_thresh=0.15): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): score threshold to limit the detection result. + + Returns: + res (list[dict]): The result of face detection, keys are 'data' and 'path', the correspoding values are: + data (list[dict]): 5 keys, where + 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, + 'confidence' is the confidence this bbox. + path (str): The path of original image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data: + if 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + + res = list() + # process one by one + for element in reader(images, paths): + image = np.expand_dims(element['image'], axis=0).astype('float32') + image_tensor = PaddleTensor(image.copy()) + data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) + # print(len(data_out)) # 1 + out = postprocess( + data_out=data_out[0].as_ndarray(), + org_im=element['org_im'], + org_im_path=element['org_im_path'], + org_im_width=element['org_im_width'], + org_im_height=element['org_im_height'], + output_dir=output_dir, + visualization=visualization, + score_thresh=score_thresh) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.15, help="score threshold of face detection.") diff --git a/modules/image/face_detection/pyramidbox_face_detection/processor.py b/modules/image/face_detection/pyramidbox_face_detection/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..0d27ee57c6785f867397405bcfdcb0cd3126ebd4 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_face_detection/processor.py @@ -0,0 +1,126 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +from collections import OrderedDict + +import base64 +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, org_im_path, output_dir): + """ + Get save image name. + """ + # name prefix of original image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + # extension + if img.mode == 'RGBA': + ext = '.png' + else: + ext = '.jpg' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + return save_im_path + + +def draw_bboxes(image, bboxes, org_im_path, output_dir): + """ + Draw bounding boxes on image. + + Args: + bboxes (np.array): bounding boxes. + """ + draw = ImageDraw.Draw(image) + for i in range(len(bboxes)): + xmin, ymin, xmax, ymax = bboxes[i] + (left, right, top, bottom) = (xmin, xmax, ymin, ymax) + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=4, fill='red') + save_name = get_save_image_name(image, org_im_path, output_dir) + image.save(save_name) + + +def postprocess(data_out, org_im, org_im_path, org_im_width, org_im_height, output_dir, visualization, score_thresh): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im: (PIL.Image object): original image. + org_im_path (str): path of original image. + org_im_width (int): width of original image. + org_im_height (int): height of original image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + output (dict): keys are 'data' and 'path', the correspoding values are: + data (list[dict]): 5 keys, where + 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, + 'confidence' is the confidence this bbox. + path (str): The path of original image. + """ + output = dict() + output['data'] = list() + output['path'] = org_im_path + + if data_out.shape[0] == 0: + print("No face detected in {}".format(org_im_path)) + else: + det_conf = data_out[:, 1] + det_xmin = org_im_width * data_out[:, 2] + det_ymin = org_im_height * data_out[:, 3] + det_xmax = org_im_width * data_out[:, 4] + det_ymax = org_im_height * data_out[:, 5] + dets = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) + keep_index = np.where(dets[:, 4] >= score_thresh)[0] + dets = dets[keep_index, :] + + if dets.shape[0] == 0: + print("No face detected in {}".format(org_im_path)) + else: + for detect_face in dets: + dt_i = dict() + dt_i['left'] = float(detect_face[0]) + dt_i['top'] = float(detect_face[1]) + dt_i['right'] = float(detect_face[2]) + dt_i['bottom'] = float(detect_face[3]) + dt_i['confidence'] = float(detect_face[4]) + output['data'].append(dt_i) + + if visualization: + check_dir(output_dir) + draw_bboxes(org_im, dets[:, 0:4], org_im_path, output_dir) + return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/README.md b/modules/image/face_detection/pyramidbox_lite_mobile/README.md similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_mobile/README.md rename to modules/image/face_detection/pyramidbox_lite_mobile/README.md diff --git a/hub_module/modules/image/face_detection/pyramidbox_face_detection/__init__.py b/modules/image/face_detection/pyramidbox_lite_mobile/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_face_detection/__init__.py rename to modules/image/face_detection/pyramidbox_lite_mobile/__init__.py diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/data_feed.py b/modules/image/face_detection/pyramidbox_lite_mobile/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..a2cb2883fa07fa76749f26bbdf63298520da4fef --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile/data_feed.py @@ -0,0 +1,63 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def preprocess(org_im, shrink): + image = org_im.copy() + image_height, image_width, image_channel = image.shape + if shrink != 1: + image_height, image_width = int(image_height * shrink), int(image_width * shrink) + image = cv2.resize(image, (image_width, image_height), cv2.INTER_NEAREST) + # HWC to CHW + if len(image.shape) == 3: + image = np.swapaxes(image, 1, 2) + image = np.swapaxes(image, 1, 0) + # mean, std + mean = [104., 117., 123.] + scale = 0.007843 + image = image.astype('float32') + image -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') + image = image * scale + return image, image_height, image_width + + +def reader(images, paths, shrink): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): paths to images. + shrink (float): parameter to control the resize scale in preprocess. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths is not None: + assert type(paths) is list, "paths should be a list." + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['org_im'] = im + each['org_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + component.append(each) + + for element in component: + element['image'], element['image_height'], element['image_width'] = preprocess(element['org_im'], shrink) + yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/label_list.txt b/modules/image/face_detection/pyramidbox_lite_mobile/label_list.txt similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_mobile/label_list.txt rename to modules/image/face_detection/pyramidbox_lite_mobile/label_list.txt diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/module.py b/modules/image/face_detection/pyramidbox_lite_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..468aff860de23e9466d751217ab777f1895e5b96 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile/module.py @@ -0,0 +1,194 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from pyramidbox_lite_mobile.data_feed import reader +from pyramidbox_lite_mobile.processor import postprocess, base64_to_cv2 + + +@moduleinfo( + name="pyramidbox_lite_mobile", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary="PyramidBox-Lite-Mobile is a high-performance face detection model.", + version="1.2.0") +class PyramidBoxLiteMobile(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_face_detection") + self._set_config() + self.processor = self + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def face_detection(self, + images=None, + paths=None, + data=None, + use_gpu=False, + output_dir='detection_result', + visualization=False, + shrink=0.5, + confs_threshold=0.6): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + shrink (float): parameter to control the resize scale in preprocess. + confs_threshold (float): confidence threshold. + + Returns: + res (list[dict]): The result of face detection and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data: + if 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + elif 'data' in data: + if images is None: + images = list() + images += data['data'] + + res = list() + # process one by one + for element in reader(images, paths, shrink): + image = np.expand_dims(element['image'], axis=0).astype('float32') + image_tensor = PaddleTensor(image.copy()) + data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) + out = postprocess( + data_out=data_out[0].as_ndarray(), + org_im=element['org_im'], + org_im_path=element['org_im_path'], + image_width=element['image_width'], + image_height=element['image_height'], + output_dir=output_dir, + visualization=visualization, + shrink=shrink, + confs_threshold=confs_threshold) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + var = program.global_block().vars['detection_output_0.tmp_1'] + var.desc.set_dtype(fluid.core.VarDesc.VarType.INT32) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--shrink', + type=ast.literal_eval, + default=0.5, + help="resize the image to shrink * original_shape before feeding into network.") + self.arg_input_group.add_argument( + '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/processor.py b/modules/image/face_detection/pyramidbox_lite_mobile/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..5057ab5b1fc6c84fdd06a7fcc45336031fa2ba4d --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile/processor.py @@ -0,0 +1,117 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +from collections import OrderedDict + +import base64 +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['base64_to_cv2', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of original image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + # extension + img = Image.fromarray(org_im[:, :, ::-1]) + if img.mode == 'RGBA': + ext = '.png' + elif img.mode == 'RGB': + ext = '.jpg' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path + + +def clip_bbox(bbox, img_height, img_width): + bbox['left'] = int(max(min(bbox['left'], img_width), 0.)) + bbox['top'] = int(max(min(bbox['top'], img_height), 0.)) + bbox['right'] = int(max(min(bbox['right'], img_width), 0.)) + bbox['bottom'] = int(max(min(bbox['bottom'], img_height), 0.)) + return bbox + + +def postprocess(data_out, org_im, org_im_path, image_height, image_width, output_dir, visualization, shrink, + confs_threshold): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_path (list): path of riginal image. + image_height (int): height of preprocessed image. + image_width (int): width of preprocessed image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + shrink (float): parameter to control the resize scale in preprocess. + confs_threshold (float): confidence threshold. + + Returns: + output (dict): keys are 'data' and 'path', the correspoding values are: + data (list[dict]): 5 keys, where + 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, + 'confidence' is the confidence this bbox. + path (str): The path of original image. + """ + output = dict() + output['data'] = list() + output['path'] = org_im_path + + for each_data in data_out: + # each_data is a list: [label, confidence, left, top, right, bottom] + if each_data[1] > confs_threshold: + dt_bbox = dict() + dt_bbox['confidence'] = float(each_data[1]) + dt_bbox['left'] = image_width * each_data[2] / shrink + dt_bbox['top'] = image_height * each_data[3] / shrink + dt_bbox['right'] = image_width * each_data[4] / shrink + dt_bbox['bottom'] = image_height * each_data[5] / shrink + dt_bbox = clip_bbox(dt_bbox, org_im.shape[0], org_im.shape[1]) + output['data'].append(dt_bbox) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + im_out = org_im.copy() + if len(output['data']) > 0: + for bbox in output['data']: + cv2.rectangle(im_out, (bbox['left'], bbox['top']), (bbox['right'], bbox['bottom']), (255, 255, 0), 2) + cv2.imwrite(save_im_path, im_out) + + return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md rename to modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile/__init__.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_mobile/__init__.py rename to modules/image/face_detection/pyramidbox_lite_mobile_mask/__init__.py diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/data_feed.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..608eb0d7d8bea89e3a41f785ddefe519ce538af6 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/data_feed.py @@ -0,0 +1,185 @@ +# coding=utf-8 +import os +import math +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + +multi_scales = [0.3, 0.6, 0.9] + + +def bbox_vote(det): + order = det[:, 4].ravel().argsort()[::-1] + det = det[order, :] + if det.shape[0] == 0: + dets = np.array([[10, 10, 20, 20, 0.002]]) + det = np.empty(shape=[0, 5]) + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + # nms + merge_index = np.where(o >= 0.3)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + if merge_index.shape[0] <= 1: + if det.shape[0] == 0: + try: + dets = np.row_stack((dets, det_accu)) + except: + dets = det_accu + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + dets = dets[0:750, :] + return dets + + +def crop(image, pts, shift=0, scale=1.5, rotate=0, res_width=128, res_height=128): + res = (res_width, res_height) + idx1 = 0 + idx2 = 1 + # angle + alpha = 0 + if pts[idx2, 0] != -1 and pts[idx2, 1] != -1 and pts[idx1, 0] != -1 and pts[idx1, 1] != -1: + alpha = math.atan2(pts[idx2, 1] - pts[idx1, 1], pts[idx2, 0] - pts[idx1, 0]) * 180 / math.pi + pts[pts == -1] = np.inf + coord_min = np.min(pts, 0) + pts[pts == np.inf] = -1 + coord_max = np.max(pts, 0) + # coordinates of center point + c = np.array([coord_max[0] - (coord_max[0] - coord_min[0]) / 2, + coord_max[1] - (coord_max[1] - coord_min[1]) / 2]) # center + max_wh = max((coord_max[0] - coord_min[0]) / 2, (coord_max[1] - coord_min[1]) / 2) + # Shift the center point, rot add eyes angle + c = c + shift * max_wh + rotate = rotate + alpha + M = cv2.getRotationMatrix2D((c[0], c[1]), rotate, res[0] / (2 * max_wh * scale)) + M[0, 2] = M[0, 2] - (c[0] - res[0] / 2.0) + M[1, 2] = M[1, 2] - (c[1] - res[0] / 2.0) + image_out = cv2.warpAffine(image, M, res) + return image_out, M + + +def color_normalize(image, mean, std=None): + if image.shape[-1] == 1: + image = np.repeat(image, axis=2) + h, w, c = image.shape + image = np.transpose(image, (2, 0, 1)) + image = np.subtract(image.reshape(c, -1), mean[:, np.newaxis]).reshape(-1, h, w) + image = np.transpose(image, (1, 2, 0)) + return image + + +def process_image(org_im, face): + pts = np.array([ + face['left'], face['top'], face['right'], face['top'], face['left'], face['bottom'], face['right'], + face['bottom'] + ]).reshape(4, 2).astype(np.float32) + image_in, M = crop(org_im, pts) + image_in = image_in / 256.0 + image_in = color_normalize(image_in, mean=np.array([0.5, 0.5, 0.5])) + image_in = image_in.astype(np.float32).transpose([2, 0, 1]).reshape(-1, 3, 128, 128) + return image_in + + +def reader(face_detector, shrink, confs_threshold, images, paths, use_gpu, use_multi_scale): + """ + Preprocess to yield image. + + Args: + face_detector (class): class to detect faces. + shrink (float): parameter to control the resize scale in face_detector. + confs_threshold (float): confidence threshold of face_detector. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): paths to images. + use_gpu (bool): whether to use gpu in face_detector. + use_multi_scale (bool): whether to enable multi-scale face detection. + Yield: + element (collections.OrderedDict): info of original image, preprocessed image, contains 3 keys: + org_im (numpy.ndarray) : original image. + org_im_path (str): path to original image. + preprocessed (list[OrderedDict]):each element contains 2 keys: + face (dict): face detected in the original image. + image (numpy.ndarray): data to be fed into neural network. + """ + component = list() + if paths is not None: + assert type(paths) is list, "paths should be a list." + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['org_im'] = im + each['org_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + component.append(each) + + for element in component: + if use_multi_scale: + scale_res = list() + detect_faces = list() + for scale in multi_scales: + _detect_res = face_detector.face_detection( + images=[element['org_im']], + use_gpu=use_gpu, + visualization=False, + shrink=scale, + confs_threshold=confs_threshold) + + _s = list() + for _face in _detect_res[0]['data']: + _face_list = [_face['left'], _face['top'], _face['right'], _face['bottom'], _face['confidence']] + _s.append(_face_list) + + if _s: + scale_res.append(np.array(_s)) + + scale_res = np.row_stack(scale_res) + scale_res = bbox_vote(scale_res) + keep_index = np.where(scale_res[:, 4] >= confs_threshold)[0] + scale_res = scale_res[keep_index, :] + for data in scale_res: + face = {'left': data[0], 'top': data[1], 'right': data[2], 'bottom': data[3], 'confidence': data[4]} + detect_faces.append(face) + else: + _detect_res = face_detector.face_detection( + images=[element['org_im']], + use_gpu=use_gpu, + visualization=False, + shrink=shrink, + confs_threshold=confs_threshold) + detect_faces = _detect_res[0]['data'] + + element['preprocessed'] = list() + for face in detect_faces: + handled = OrderedDict() + handled['face'] = face + handled['image'] = process_image(element['org_im'], face) + element['preprocessed'].append(handled) + + yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/label_list.txt b/modules/image/face_detection/pyramidbox_lite_mobile_mask/label_list.txt similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/label_list.txt rename to modules/image/face_detection/pyramidbox_lite_mobile_mask/label_list.txt diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py new file mode 100644 index 0000000000000000000000000000000000000000..73d3c68512255ad024b19800d327f3d744d6da77 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py @@ -0,0 +1,257 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from pyramidbox_lite_mobile_mask.data_feed import reader +from pyramidbox_lite_mobile_mask.processor import postprocess, base64_to_cv2 + + +@moduleinfo( + name="pyramidbox_lite_mobile_mask", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary= + "Pyramidbox-Lite-Mobile-Mask is a high-performance face detection model used to detect whether people wear masks.", + version="1.3.0") +class PyramidBoxLiteMobileMask(hub.Module): + def _initialize(self, face_detector_module=None): + """ + Args: + face_detector_module (class): module to detect face. + """ + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_mask_model") + if face_detector_module is None: + self.face_detector = hub.Module(name='pyramidbox_lite_mobile') + else: + self.face_detector = face_detector_module + self._set_config() + self.processor = self + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def set_face_detector_module(self, face_detector_module): + """ + Set face detector. + Args: + face_detector_module (class): module to detect face. + """ + self.face_detector = face_detector_module + + def get_face_detector_module(self): + return self.face_detector + + def face_detection(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='detection_result', + use_multi_scale=False, + shrink=0.5, + confs_threshold=0.6): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size of image tensor to be fed into the later classification network. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + use_multi_scale (bool): whether to enable multi-scale face detection. Enabling multi-scale face detection + can increase the accuracy to detect faces, however, + it reduce the prediction speed for the increase model calculation. + shrink (float): parameter to control the resize scale in preprocess. + confs_threshold (float): confidence threshold. + + Returns: + res (list[dict]): The result of face detection and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data: + if 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + elif 'data' in data: + if images is None: + images = list() + images += data['data'] + + # get all data + all_element = list() + for yield_data in reader(self.face_detector, shrink, confs_threshold, images, paths, use_gpu, use_multi_scale): + all_element.append(yield_data) + + image_list = list() + element_image_num = list() + for i in range(len(all_element)): + element_image = [handled['image'] for handled in all_element[i]['preprocessed']] + element_image_num.append(len(element_image)) + image_list.extend(element_image) + + total_num = len(image_list) + loop_num = int(np.ceil(total_num / batch_size)) + + predict_out = np.zeros((1, 2)) + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for element_id in range(batch_size): + try: + batch_data.append(image_list[handle_id + element_id]) + except: + pass + + image_arr = np.squeeze(np.array(batch_data), axis=1) + image_tensor = PaddleTensor(image_arr.copy()) + data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) + # len(data_out) == 1 + # data_out[0].as_ndarray().shape == (-1, 2) + data_out = data_out[0].as_ndarray() + predict_out = np.concatenate((predict_out, data_out)) + + predict_out = predict_out[1:] + # postprocess one by one + res = list() + for i in range(len(all_element)): + detect_faces_list = [handled['face'] for handled in all_element[i]['preprocessed']] + interval_left = sum(element_image_num[0:i]) + interval_right = interval_left + element_image_num[i] + out = postprocess( + confidence_out=predict_out[interval_left:interval_right], + org_im=all_element[i]['org_im'], + org_im_path=all_element[i]['org_im_path'], + detected_faces=detect_faces_list, + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + classifier_dir = os.path.join(dirname, 'mask_detector') + detector_dir = os.path.join(dirname, 'pyramidbox_lite') + self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) + self._save_detector_model(detector_dir, model_filename, params_filename, combined) + + def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): + self.face_detector.save_inference_model(dirname, model_filename, params_filename, combined) + + def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--shrink', + type=ast.literal_eval, + default=0.5, + help="resize the image to `shrink * original_shape` before feeding into network.") + self.arg_input_group.add_argument( + '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9173f888b0994b8090dc7d1f2a2e0b72bdd208 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py @@ -0,0 +1,139 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +from collections import OrderedDict + +import base64 +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'postprocess'] + +label_list = ['NO MASK', 'MASK'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of original image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + # extension + img = Image.fromarray(org_im[:, :, ::-1]) + if img.mode == 'RGBA': + ext = '.png' + elif img.mode == 'RGB': + ext = '.jpg' + elif img.mode == 'L': # black and white + ext = '.jpg' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path + + +def draw_bounding_box_on_image(save_im_path, output_data): + image = Image.open(save_im_path) + draw = ImageDraw.Draw(image) + for bbox in output_data: + # draw bouding box + if bbox['label'] == "MASK": + draw.line([(bbox['left'], bbox['top']), (bbox['left'], bbox['bottom']), (bbox['right'], bbox['bottom']), + (bbox['right'], bbox['top']), (bbox['left'], bbox['top'])], + width=2, + fill='green') + else: + draw.line([(bbox['left'], bbox['top']), (bbox['left'], bbox['bottom']), (bbox['right'], bbox['bottom']), + (bbox['right'], bbox['top']), (bbox['left'], bbox['top'])], + width=2, + fill='red') + # draw label + text = bbox['label'] + ": %.2f%%" % (100 * bbox['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + if image.mode == 'RGB' or image.mode == 'RGBA': + box_fill = (255, 255, 255) + text_fill = (0, 0, 0) + else: + box_fill = (255) + text_fill = (0) + + draw.rectangle( + xy=(bbox['left'], bbox['top'] - (textsize_height + 5), bbox['left'] + textsize_width + 10, bbox['top'] - 3), + fill=box_fill) + draw.text(xy=(bbox['left'], bbox['top'] - 15), text=text, fill=text_fill) + image.save(save_im_path) + + +def postprocess(confidence_out, org_im, org_im_path, detected_faces, output_dir, visualization): + """ + Postprocess output of network. one element at a time. + + Args: + confidence_out (numpy.ndarray): confidences of each label. + org_im (numpy.ndarray): original image. + org_im_path (list): path of original image. + detected_faces (list): faces detected in a picture. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + output (dict): keys are 'data' and 'path', the correspoding values are: + data (list[dict]): 6 keys, where + 'label' is `MASK` or `NO MASK`, + 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, + 'confidence' is the confidence of mask detection. + path (str): The path of original image. + """ + output = dict() + output['data'] = list() + output['path'] = org_im_path + + for index, face in enumerate(detected_faces): + label_idx = np.argmax(confidence_out[index]) + label_confidence = confidence_out[index][label_idx] + bbox = dict() + bbox['label'] = label_list[label_idx] + bbox['confidence'] = label_confidence + bbox['top'] = detected_faces[index]['top'] + bbox['bottom'] = detected_faces[index]['bottom'] + bbox['left'] = detected_faces[index]['left'] + bbox['right'] = detected_faces[index]['right'] + output['data'].append(bbox) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, org_im) + draw_bounding_box_on_image(save_im_path, output['data']) + + return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server/README.md b/modules/image/face_detection/pyramidbox_lite_server/README.md similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_server/README.md rename to modules/image/face_detection/pyramidbox_lite_server/README.md diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/__init__.py b/modules/image/face_detection/pyramidbox_lite_server/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_mobile_mask/__init__.py rename to modules/image/face_detection/pyramidbox_lite_server/__init__.py diff --git a/modules/image/face_detection/pyramidbox_lite_server/data_feed.py b/modules/image/face_detection/pyramidbox_lite_server/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..a2cb2883fa07fa76749f26bbdf63298520da4fef --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server/data_feed.py @@ -0,0 +1,63 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def preprocess(org_im, shrink): + image = org_im.copy() + image_height, image_width, image_channel = image.shape + if shrink != 1: + image_height, image_width = int(image_height * shrink), int(image_width * shrink) + image = cv2.resize(image, (image_width, image_height), cv2.INTER_NEAREST) + # HWC to CHW + if len(image.shape) == 3: + image = np.swapaxes(image, 1, 2) + image = np.swapaxes(image, 1, 0) + # mean, std + mean = [104., 117., 123.] + scale = 0.007843 + image = image.astype('float32') + image -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') + image = image * scale + return image, image_height, image_width + + +def reader(images, paths, shrink): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): paths to images. + shrink (float): parameter to control the resize scale in preprocess. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths is not None: + assert type(paths) is list, "paths should be a list." + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['org_im'] = im + each['org_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + component.append(each) + + for element in component: + element['image'], element['image_height'], element['image_width'] = preprocess(element['org_im'], shrink) + yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server/label_list.txt b/modules/image/face_detection/pyramidbox_lite_server/label_list.txt similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_server/label_list.txt rename to modules/image/face_detection/pyramidbox_lite_server/label_list.txt diff --git a/modules/image/face_detection/pyramidbox_lite_server/module.py b/modules/image/face_detection/pyramidbox_lite_server/module.py new file mode 100644 index 0000000000000000000000000000000000000000..617baba6cce0ef6cc4861a95369fd1c80f9b8a47 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server/module.py @@ -0,0 +1,191 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from pyramidbox_lite_server.data_feed import reader +from pyramidbox_lite_server.processor import postprocess, base64_to_cv2 + + +@moduleinfo( + name="pyramidbox_lite_server", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary="PyramidBox-Lite-Server is a high-performance face detection model.", + version="1.2.0") +class PyramidBoxLiteServer(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_server_face_detection") + self._set_config() + self.processor = self + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def face_detection(self, + images=None, + paths=None, + data=None, + use_gpu=False, + output_dir='detection_result', + visualization=False, + shrink=0.5, + confs_threshold=0.6): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + shrink (float): parameter to control the resize scale in preprocess. + confs_threshold (float): confidence threshold. + + Returns: + res (list[dict]): The result of face detection and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data: + if 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + elif 'data' in data: + if images is None: + images = list() + images += data['data'] + + res = list() + # process one by one + for element in reader(images, paths, shrink): + image = np.expand_dims(element['image'], axis=0).astype('float32') + image_tensor = PaddleTensor(image.copy()) + data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) + out = postprocess( + data_out=data_out[0].as_ndarray(), + org_im=element['org_im'], + org_im_path=element['org_im_path'], + image_width=element['image_width'], + image_height=element['image_height'], + output_dir=output_dir, + visualization=visualization, + shrink=shrink, + confs_threshold=confs_threshold) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--shrink', + type=ast.literal_eval, + default=0.5, + help="resize the image to shrink * original_shape before feeding into network.") + self.arg_input_group.add_argument( + '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_server/processor.py b/modules/image/face_detection/pyramidbox_lite_server/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..5057ab5b1fc6c84fdd06a7fcc45336031fa2ba4d --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server/processor.py @@ -0,0 +1,117 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +from collections import OrderedDict + +import base64 +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['base64_to_cv2', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of original image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + # extension + img = Image.fromarray(org_im[:, :, ::-1]) + if img.mode == 'RGBA': + ext = '.png' + elif img.mode == 'RGB': + ext = '.jpg' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path + + +def clip_bbox(bbox, img_height, img_width): + bbox['left'] = int(max(min(bbox['left'], img_width), 0.)) + bbox['top'] = int(max(min(bbox['top'], img_height), 0.)) + bbox['right'] = int(max(min(bbox['right'], img_width), 0.)) + bbox['bottom'] = int(max(min(bbox['bottom'], img_height), 0.)) + return bbox + + +def postprocess(data_out, org_im, org_im_path, image_height, image_width, output_dir, visualization, shrink, + confs_threshold): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_path (list): path of riginal image. + image_height (int): height of preprocessed image. + image_width (int): width of preprocessed image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + shrink (float): parameter to control the resize scale in preprocess. + confs_threshold (float): confidence threshold. + + Returns: + output (dict): keys are 'data' and 'path', the correspoding values are: + data (list[dict]): 5 keys, where + 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, + 'confidence' is the confidence this bbox. + path (str): The path of original image. + """ + output = dict() + output['data'] = list() + output['path'] = org_im_path + + for each_data in data_out: + # each_data is a list: [label, confidence, left, top, right, bottom] + if each_data[1] > confs_threshold: + dt_bbox = dict() + dt_bbox['confidence'] = float(each_data[1]) + dt_bbox['left'] = image_width * each_data[2] / shrink + dt_bbox['top'] = image_height * each_data[3] / shrink + dt_bbox['right'] = image_width * each_data[4] / shrink + dt_bbox['bottom'] = image_height * each_data[5] / shrink + dt_bbox = clip_bbox(dt_bbox, org_im.shape[0], org_im.shape[1]) + output['data'].append(dt_bbox) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + im_out = org_im.copy() + if len(output['data']) > 0: + for bbox in output['data']: + cv2.rectangle(im_out, (bbox['left'], bbox['top']), (bbox['right'], bbox['bottom']), (255, 255, 0), 2) + cv2.imwrite(save_im_path, im_out) + + return output diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/README.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/README.md rename to modules/image/face_detection/pyramidbox_lite_server_mask/README.md diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server/__init__.py b/modules/image/face_detection/pyramidbox_lite_server_mask/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_server/__init__.py rename to modules/image/face_detection/pyramidbox_lite_server_mask/__init__.py diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py b/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..608eb0d7d8bea89e3a41f785ddefe519ce538af6 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py @@ -0,0 +1,185 @@ +# coding=utf-8 +import os +import math +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + +multi_scales = [0.3, 0.6, 0.9] + + +def bbox_vote(det): + order = det[:, 4].ravel().argsort()[::-1] + det = det[order, :] + if det.shape[0] == 0: + dets = np.array([[10, 10, 20, 20, 0.002]]) + det = np.empty(shape=[0, 5]) + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + # nms + merge_index = np.where(o >= 0.3)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + if merge_index.shape[0] <= 1: + if det.shape[0] == 0: + try: + dets = np.row_stack((dets, det_accu)) + except: + dets = det_accu + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + dets = dets[0:750, :] + return dets + + +def crop(image, pts, shift=0, scale=1.5, rotate=0, res_width=128, res_height=128): + res = (res_width, res_height) + idx1 = 0 + idx2 = 1 + # angle + alpha = 0 + if pts[idx2, 0] != -1 and pts[idx2, 1] != -1 and pts[idx1, 0] != -1 and pts[idx1, 1] != -1: + alpha = math.atan2(pts[idx2, 1] - pts[idx1, 1], pts[idx2, 0] - pts[idx1, 0]) * 180 / math.pi + pts[pts == -1] = np.inf + coord_min = np.min(pts, 0) + pts[pts == np.inf] = -1 + coord_max = np.max(pts, 0) + # coordinates of center point + c = np.array([coord_max[0] - (coord_max[0] - coord_min[0]) / 2, + coord_max[1] - (coord_max[1] - coord_min[1]) / 2]) # center + max_wh = max((coord_max[0] - coord_min[0]) / 2, (coord_max[1] - coord_min[1]) / 2) + # Shift the center point, rot add eyes angle + c = c + shift * max_wh + rotate = rotate + alpha + M = cv2.getRotationMatrix2D((c[0], c[1]), rotate, res[0] / (2 * max_wh * scale)) + M[0, 2] = M[0, 2] - (c[0] - res[0] / 2.0) + M[1, 2] = M[1, 2] - (c[1] - res[0] / 2.0) + image_out = cv2.warpAffine(image, M, res) + return image_out, M + + +def color_normalize(image, mean, std=None): + if image.shape[-1] == 1: + image = np.repeat(image, axis=2) + h, w, c = image.shape + image = np.transpose(image, (2, 0, 1)) + image = np.subtract(image.reshape(c, -1), mean[:, np.newaxis]).reshape(-1, h, w) + image = np.transpose(image, (1, 2, 0)) + return image + + +def process_image(org_im, face): + pts = np.array([ + face['left'], face['top'], face['right'], face['top'], face['left'], face['bottom'], face['right'], + face['bottom'] + ]).reshape(4, 2).astype(np.float32) + image_in, M = crop(org_im, pts) + image_in = image_in / 256.0 + image_in = color_normalize(image_in, mean=np.array([0.5, 0.5, 0.5])) + image_in = image_in.astype(np.float32).transpose([2, 0, 1]).reshape(-1, 3, 128, 128) + return image_in + + +def reader(face_detector, shrink, confs_threshold, images, paths, use_gpu, use_multi_scale): + """ + Preprocess to yield image. + + Args: + face_detector (class): class to detect faces. + shrink (float): parameter to control the resize scale in face_detector. + confs_threshold (float): confidence threshold of face_detector. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): paths to images. + use_gpu (bool): whether to use gpu in face_detector. + use_multi_scale (bool): whether to enable multi-scale face detection. + Yield: + element (collections.OrderedDict): info of original image, preprocessed image, contains 3 keys: + org_im (numpy.ndarray) : original image. + org_im_path (str): path to original image. + preprocessed (list[OrderedDict]):each element contains 2 keys: + face (dict): face detected in the original image. + image (numpy.ndarray): data to be fed into neural network. + """ + component = list() + if paths is not None: + assert type(paths) is list, "paths should be a list." + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['org_im'] = im + each['org_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + component.append(each) + + for element in component: + if use_multi_scale: + scale_res = list() + detect_faces = list() + for scale in multi_scales: + _detect_res = face_detector.face_detection( + images=[element['org_im']], + use_gpu=use_gpu, + visualization=False, + shrink=scale, + confs_threshold=confs_threshold) + + _s = list() + for _face in _detect_res[0]['data']: + _face_list = [_face['left'], _face['top'], _face['right'], _face['bottom'], _face['confidence']] + _s.append(_face_list) + + if _s: + scale_res.append(np.array(_s)) + + scale_res = np.row_stack(scale_res) + scale_res = bbox_vote(scale_res) + keep_index = np.where(scale_res[:, 4] >= confs_threshold)[0] + scale_res = scale_res[keep_index, :] + for data in scale_res: + face = {'left': data[0], 'top': data[1], 'right': data[2], 'bottom': data[3], 'confidence': data[4]} + detect_faces.append(face) + else: + _detect_res = face_detector.face_detection( + images=[element['org_im']], + use_gpu=use_gpu, + visualization=False, + shrink=shrink, + confs_threshold=confs_threshold) + detect_faces = _detect_res[0]['data'] + + element['preprocessed'] = list() + for face in detect_faces: + handled = OrderedDict() + handled['face'] = face + handled['image'] = process_image(element['org_im'], face) + element['preprocessed'].append(handled) + + yield element diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/label_list.txt b/modules/image/face_detection/pyramidbox_lite_server_mask/label_list.txt similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/label_list.txt rename to modules/image/face_detection/pyramidbox_lite_server_mask/label_list.txt diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/module.py b/modules/image/face_detection/pyramidbox_lite_server_mask/module.py new file mode 100644 index 0000000000000000000000000000000000000000..90b45e83be0c423738b041e82b77ed88a251f310 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/module.py @@ -0,0 +1,256 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from pyramidbox_lite_server_mask.data_feed import reader +from pyramidbox_lite_server_mask.processor import postprocess, base64_to_cv2 + + +@moduleinfo( + name="pyramidbox_lite_server_mask", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary= + "PyramidBox-Lite-Server-Mask is a high-performance face detection model used to detect whether people wear masks.", + version="1.3.0") +class PyramidBoxLiteServerMask(hub.Module): + def _initialize(self, face_detector_module=None): + """ + Args: + face_detector_module (class): module to detect face. + """ + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_server_mask_model") + if face_detector_module is None: + self.face_detector = hub.Module(name='pyramidbox_lite_server') + else: + self.face_detector = face_detector_module + self._set_config() + self.processor = self + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def set_face_detector_module(self, face_detector_module): + """ + Set face detector. + Args: + face_detector_module (class): module to detect faces. + """ + self.face_detector = face_detector_module + + def get_face_detector_module(self): + return self.face_detector + + def face_detection(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='detection_result', + use_multi_scale=False, + shrink=0.5, + confs_threshold=0.6): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + use_multi_scale (bool): whether to enable multi-scale face detection. Enabling multi-scale face detection + can increase the accuracy to detect faces, however, + it reduce the prediction speed for the increase model calculation. + shrink (float): parameter to control the resize scale in preprocess. + confs_threshold (float): confidence threshold. + + Returns: + res (list[dict]): The result of face detection and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data: + if 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + elif 'data' in data: + if images is None: + images = list() + images += data['data'] + + # get all data + all_element = list() + for yield_data in reader(self.face_detector, shrink, confs_threshold, images, paths, use_gpu, use_multi_scale): + all_element.append(yield_data) + + image_list = list() + element_image_num = list() + for i in range(len(all_element)): + element_image = [handled['image'] for handled in all_element[i]['preprocessed']] + element_image_num.append(len(element_image)) + image_list.extend(element_image) + + total_num = len(image_list) + loop_num = int(np.ceil(total_num / batch_size)) + + predict_out = np.zeros((1, 2)) + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for element_id in range(batch_size): + try: + batch_data.append(image_list[handle_id + element_id]) + except: + pass + + image_arr = np.squeeze(np.array(batch_data), axis=1) + image_tensor = PaddleTensor(image_arr.copy()) + data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) + # len(data_out) == 1 + # data_out[0].as_ndarray().shape == (-1, 2) + data_out = data_out[0].as_ndarray() + predict_out = np.concatenate((predict_out, data_out)) + + predict_out = predict_out[1:] + # postprocess one by one + res = list() + for i in range(len(all_element)): + detect_faces_list = [handled['face'] for handled in all_element[i]['preprocessed']] + interval_left = sum(element_image_num[0:i]) + interval_right = interval_left + element_image_num[i] + out = postprocess( + confidence_out=predict_out[interval_left:interval_right], + org_im=all_element[i]['org_im'], + org_im_path=all_element[i]['org_im_path'], + detected_faces=detect_faces_list, + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + classifier_dir = os.path.join(dirname, 'mask_detector') + detector_dir = os.path.join(dirname, 'pyramidbox_lite') + self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) + self._save_detector_model(detector_dir, model_filename, params_filename, combined) + + def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): + self.face_detector.save_inference_model(dirname, model_filename, params_filename, combined) + + def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--shrink', + type=ast.literal_eval, + default=0.5, + help="resize the image to `shrink * original_shape` before feeding into network.") + self.arg_input_group.add_argument( + '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py b/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9173f888b0994b8090dc7d1f2a2e0b72bdd208 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py @@ -0,0 +1,139 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +from collections import OrderedDict + +import base64 +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'postprocess'] + +label_list = ['NO MASK', 'MASK'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of original image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + # extension + img = Image.fromarray(org_im[:, :, ::-1]) + if img.mode == 'RGBA': + ext = '.png' + elif img.mode == 'RGB': + ext = '.jpg' + elif img.mode == 'L': # black and white + ext = '.jpg' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path + + +def draw_bounding_box_on_image(save_im_path, output_data): + image = Image.open(save_im_path) + draw = ImageDraw.Draw(image) + for bbox in output_data: + # draw bouding box + if bbox['label'] == "MASK": + draw.line([(bbox['left'], bbox['top']), (bbox['left'], bbox['bottom']), (bbox['right'], bbox['bottom']), + (bbox['right'], bbox['top']), (bbox['left'], bbox['top'])], + width=2, + fill='green') + else: + draw.line([(bbox['left'], bbox['top']), (bbox['left'], bbox['bottom']), (bbox['right'], bbox['bottom']), + (bbox['right'], bbox['top']), (bbox['left'], bbox['top'])], + width=2, + fill='red') + # draw label + text = bbox['label'] + ": %.2f%%" % (100 * bbox['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + if image.mode == 'RGB' or image.mode == 'RGBA': + box_fill = (255, 255, 255) + text_fill = (0, 0, 0) + else: + box_fill = (255) + text_fill = (0) + + draw.rectangle( + xy=(bbox['left'], bbox['top'] - (textsize_height + 5), bbox['left'] + textsize_width + 10, bbox['top'] - 3), + fill=box_fill) + draw.text(xy=(bbox['left'], bbox['top'] - 15), text=text, fill=text_fill) + image.save(save_im_path) + + +def postprocess(confidence_out, org_im, org_im_path, detected_faces, output_dir, visualization): + """ + Postprocess output of network. one element at a time. + + Args: + confidence_out (numpy.ndarray): confidences of each label. + org_im (numpy.ndarray): original image. + org_im_path (list): path of original image. + detected_faces (list): faces detected in a picture. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + output (dict): keys are 'data' and 'path', the correspoding values are: + data (list[dict]): 6 keys, where + 'label' is `MASK` or `NO MASK`, + 'left', 'top', 'right', 'bottom' are the coordinate of detection bounding box, + 'confidence' is the confidence of mask detection. + path (str): The path of original image. + """ + output = dict() + output['data'] = list() + output['path'] = org_im_path + + for index, face in enumerate(detected_faces): + label_idx = np.argmax(confidence_out[index]) + label_confidence = confidence_out[index][label_idx] + bbox = dict() + bbox['label'] = label_list[label_idx] + bbox['confidence'] = label_confidence + bbox['top'] = detected_faces[index]['top'] + bbox['bottom'] = detected_faces[index]['bottom'] + bbox['left'] = detected_faces[index]['left'] + bbox['right'] = detected_faces[index]['right'] + output['data'].append(bbox) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, org_im) + draw_bounding_box_on_image(save_im_path, output['data']) + + return output diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md similarity index 100% rename from hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md rename to modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md diff --git a/hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/__init__.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/pyramidbox_lite_server_mask/__init__.py rename to modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/__init__.py diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/data_feed.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..94930ca374562b8b71d0e45fc5f311f649c963f5 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/data_feed.py @@ -0,0 +1,52 @@ +# coding=utf-8 +import os +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def preprocess(orig_image): + image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (320, 240)) + image_mean = np.array([127, 127, 127]) + image = (image - image_mean) / 128.0 + image = np.transpose(image, [2, 0, 1]) + return image + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['orig_im'] = im + each['orig_im_shape'] = im.shape # height, width, channel + each['orig_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['orig_im'] = im + each['orig_im_path'] = None + each['orig_im_shape'] = im.shape # height, width, channel + component.append(each) + + for element in component: + element['image'] = preprocess(element['orig_im']) + yield element diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..22c51db2b4cc090ffe0795bbf235b8bb71c8b61c --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py @@ -0,0 +1,1185 @@ +# coding=utf-8 +from paddle.fluid.initializer import Constant +from paddle.fluid.param_attr import ParamAttr +import paddle.fluid as fluid + + +def face_detector_320(): + _319 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _322 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _323 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _333 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _336 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _337 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _365 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _368 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _369 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _379 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _382 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _383 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _405 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _408 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _409 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _419 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _422 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _423 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _437 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _440 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _441 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _449 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _452 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _453 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _463 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.10000000149011612) + _465 = fluid.layers.create_parameter( + dtype='float32', shape=[1, 4420, 2], name='_465', attr='_465', default_initializer=Constant(0.0)) + _467 = fluid.layers.create_parameter( + dtype='float32', shape=[1, 4420, 2], name='_467', attr='_467', default_initializer=Constant(0.0)) + _470 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.20000000298023224) + _473 = fluid.layers.create_parameter( + dtype='float32', shape=[1, 4420, 2], name='_473', attr='_473', default_initializer=Constant(0.0)) + _478 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) + _483 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) + _input = fluid.layers.data(dtype='float32', shape=[1, 3, 240, 320], name='_input', append_batch_size=False) + _325 = fluid.layers.assign(_322) + _326 = fluid.layers.assign(_323) + _339 = fluid.layers.assign(_336) + _340 = fluid.layers.assign(_337) + _371 = fluid.layers.assign(_368) + _372 = fluid.layers.assign(_369) + _385 = fluid.layers.assign(_382) + _386 = fluid.layers.assign(_383) + _411 = fluid.layers.assign(_408) + _412 = fluid.layers.assign(_409) + _425 = fluid.layers.assign(_422) + _426 = fluid.layers.assign(_423) + _443 = fluid.layers.assign(_440) + _444 = fluid.layers.assign(_441) + _455 = fluid.layers.assign(_452) + _456 = fluid.layers.assign(_453) + _245 = fluid.layers.conv2d( + _input, + num_filters=16, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_0_0_weight', + name='_245', + bias_attr=False) + _246 = fluid.layers.batch_norm( + _245, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_0_1_weight', + bias_attr='_base_net_0_1_bias', + moving_mean_name='_base_net_0_1_running_mean', + moving_variance_name='_base_net_0_1_running_var', + use_global_stats=False, + name='_246') + _247 = fluid.layers.relu(_246, name='_247') + _248 = fluid.layers.conv2d( + _247, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=16, + param_attr='_base_net_1_0_weight', + name='_248', + bias_attr=False) + _249 = fluid.layers.batch_norm( + _248, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_1_1_weight', + bias_attr='_base_net_1_1_bias', + moving_mean_name='_base_net_1_1_running_mean', + moving_variance_name='_base_net_1_1_running_var', + use_global_stats=False, + name='_249') + _250 = fluid.layers.relu(_249, name='_250') + _251 = fluid.layers.conv2d( + _250, + num_filters=32, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_1_3_weight', + name='_251', + bias_attr=False) + _252 = fluid.layers.batch_norm( + _251, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_1_4_weight', + bias_attr='_base_net_1_4_bias', + moving_mean_name='_base_net_1_4_running_mean', + moving_variance_name='_base_net_1_4_running_var', + use_global_stats=False, + name='_252') + _253 = fluid.layers.relu(_252, name='_253') + _254 = fluid.layers.conv2d( + _253, + num_filters=32, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=32, + param_attr='_base_net_2_0_weight', + name='_254', + bias_attr=False) + _255 = fluid.layers.batch_norm( + _254, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_2_1_weight', + bias_attr='_base_net_2_1_bias', + moving_mean_name='_base_net_2_1_running_mean', + moving_variance_name='_base_net_2_1_running_var', + use_global_stats=False, + name='_255') + _256 = fluid.layers.relu(_255, name='_256') + _257 = fluid.layers.conv2d( + _256, + num_filters=32, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_2_3_weight', + name='_257', + bias_attr=False) + _258 = fluid.layers.batch_norm( + _257, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_2_4_weight', + bias_attr='_base_net_2_4_bias', + moving_mean_name='_base_net_2_4_running_mean', + moving_variance_name='_base_net_2_4_running_var', + use_global_stats=False, + name='_258') + _259 = fluid.layers.relu(_258, name='_259') + _260 = fluid.layers.conv2d( + _259, + num_filters=32, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=32, + param_attr='_base_net_3_0_weight', + name='_260', + bias_attr=False) + _261 = fluid.layers.batch_norm( + _260, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_3_1_weight', + bias_attr='_base_net_3_1_bias', + moving_mean_name='_base_net_3_1_running_mean', + moving_variance_name='_base_net_3_1_running_var', + use_global_stats=False, + name='_261') + _262 = fluid.layers.relu(_261, name='_262') + _263 = fluid.layers.conv2d( + _262, + num_filters=32, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_3_3_weight', + name='_263', + bias_attr=False) + _264 = fluid.layers.batch_norm( + _263, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_3_4_weight', + bias_attr='_base_net_3_4_bias', + moving_mean_name='_base_net_3_4_running_mean', + moving_variance_name='_base_net_3_4_running_var', + use_global_stats=False, + name='_264') + _265 = fluid.layers.relu(_264, name='_265') + _266 = fluid.layers.conv2d( + _265, + num_filters=32, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=32, + param_attr='_base_net_4_0_weight', + name='_266', + bias_attr=False) + _267 = fluid.layers.batch_norm( + _266, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_4_1_weight', + bias_attr='_base_net_4_1_bias', + moving_mean_name='_base_net_4_1_running_mean', + moving_variance_name='_base_net_4_1_running_var', + use_global_stats=False, + name='_267') + _268 = fluid.layers.relu(_267, name='_268') + _269 = fluid.layers.conv2d( + _268, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_4_3_weight', + name='_269', + bias_attr=False) + _270 = fluid.layers.batch_norm( + _269, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_4_4_weight', + bias_attr='_base_net_4_4_bias', + moving_mean_name='_base_net_4_4_running_mean', + moving_variance_name='_base_net_4_4_running_var', + use_global_stats=False, + name='_270') + _271 = fluid.layers.relu(_270, name='_271') + _272 = fluid.layers.conv2d( + _271, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_base_net_5_0_weight', + name='_272', + bias_attr=False) + _273 = fluid.layers.batch_norm( + _272, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_5_1_weight', + bias_attr='_base_net_5_1_bias', + moving_mean_name='_base_net_5_1_running_mean', + moving_variance_name='_base_net_5_1_running_var', + use_global_stats=False, + name='_273') + _274 = fluid.layers.relu(_273, name='_274') + _275 = fluid.layers.conv2d( + _274, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_5_3_weight', + name='_275', + bias_attr=False) + _276 = fluid.layers.batch_norm( + _275, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_5_4_weight', + bias_attr='_base_net_5_4_bias', + moving_mean_name='_base_net_5_4_running_mean', + moving_variance_name='_base_net_5_4_running_var', + use_global_stats=False, + name='_276') + _277 = fluid.layers.relu(_276, name='_277') + _278 = fluid.layers.conv2d( + _277, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_base_net_6_0_weight', + name='_278', + bias_attr=False) + _279 = fluid.layers.batch_norm( + _278, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_6_1_weight', + bias_attr='_base_net_6_1_bias', + moving_mean_name='_base_net_6_1_running_mean', + moving_variance_name='_base_net_6_1_running_var', + use_global_stats=False, + name='_279') + _280 = fluid.layers.relu(_279, name='_280') + _281 = fluid.layers.conv2d( + _280, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_6_3_weight', + name='_281', + bias_attr=False) + _282 = fluid.layers.batch_norm( + _281, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_6_4_weight', + bias_attr='_base_net_6_4_bias', + moving_mean_name='_base_net_6_4_running_mean', + moving_variance_name='_base_net_6_4_running_var', + use_global_stats=False, + name='_282') + _283 = fluid.layers.relu(_282, name='_283') + _284 = fluid.layers.conv2d( + _283, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch0_0_conv_weight', + name='_284', + bias_attr=False) + _291 = fluid.layers.conv2d( + _283, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch1_0_conv_weight', + name='_291', + bias_attr=False) + _298 = fluid.layers.conv2d( + _283, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch2_0_conv_weight', + name='_298', + bias_attr=False) + _311 = fluid.layers.conv2d( + _283, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_shortcut_conv_weight', + name='_311', + bias_attr=False) + _285 = fluid.layers.batch_norm( + _284, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch0_0_bn_weight', + bias_attr='_base_net_7_branch0_0_bn_bias', + moving_mean_name='_base_net_7_branch0_0_bn_running_mean', + moving_variance_name='_base_net_7_branch0_0_bn_running_var', + use_global_stats=False, + name='_285') + _292 = fluid.layers.batch_norm( + _291, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch1_0_bn_weight', + bias_attr='_base_net_7_branch1_0_bn_bias', + moving_mean_name='_base_net_7_branch1_0_bn_running_mean', + moving_variance_name='_base_net_7_branch1_0_bn_running_var', + use_global_stats=False, + name='_292') + _299 = fluid.layers.batch_norm( + _298, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_0_bn_weight', + bias_attr='_base_net_7_branch2_0_bn_bias', + moving_mean_name='_base_net_7_branch2_0_bn_running_mean', + moving_variance_name='_base_net_7_branch2_0_bn_running_var', + use_global_stats=False, + name='_299') + _312 = fluid.layers.batch_norm( + _311, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_shortcut_bn_weight', + bias_attr='_base_net_7_shortcut_bn_bias', + moving_mean_name='_base_net_7_shortcut_bn_running_mean', + moving_variance_name='_base_net_7_shortcut_bn_running_var', + use_global_stats=False, + name='_312') + _286 = fluid.layers.conv2d( + _285, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch0_1_conv_weight', + name='_286', + bias_attr=False) + _293 = fluid.layers.conv2d( + _292, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch1_1_conv_weight', + name='_293', + bias_attr=False) + _300 = fluid.layers.conv2d( + _299, + num_filters=12, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch2_1_conv_weight', + name='_300', + bias_attr=False) + _287 = fluid.layers.batch_norm( + _286, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch0_1_bn_weight', + bias_attr='_base_net_7_branch0_1_bn_bias', + moving_mean_name='_base_net_7_branch0_1_bn_running_mean', + moving_variance_name='_base_net_7_branch0_1_bn_running_var', + use_global_stats=False, + name='_287') + _294 = fluid.layers.batch_norm( + _293, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch1_1_bn_weight', + bias_attr='_base_net_7_branch1_1_bn_bias', + moving_mean_name='_base_net_7_branch1_1_bn_running_mean', + moving_variance_name='_base_net_7_branch1_1_bn_running_var', + use_global_stats=False, + name='_294') + _301 = fluid.layers.batch_norm( + _300, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_1_bn_weight', + bias_attr='_base_net_7_branch2_1_bn_bias', + moving_mean_name='_base_net_7_branch2_1_bn_running_mean', + moving_variance_name='_base_net_7_branch2_1_bn_running_var', + use_global_stats=False, + name='_301') + _288 = fluid.layers.relu(_287, name='_288') + _295 = fluid.layers.relu(_294, name='_295') + _302 = fluid.layers.relu(_301, name='_302') + _289 = fluid.layers.conv2d( + _288, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[2, 2], + dilation=[2, 2], + groups=1, + param_attr='_base_net_7_branch0_2_conv_weight', + name='_289', + bias_attr=False) + _296 = fluid.layers.conv2d( + _295, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[3, 3], + dilation=[3, 3], + groups=1, + param_attr='_base_net_7_branch1_2_conv_weight', + name='_296', + bias_attr=False) + _303 = fluid.layers.conv2d( + _302, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch2_2_conv_weight', + name='_303', + bias_attr=False) + _290 = fluid.layers.batch_norm( + _289, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch0_2_bn_weight', + bias_attr='_base_net_7_branch0_2_bn_bias', + moving_mean_name='_base_net_7_branch0_2_bn_running_mean', + moving_variance_name='_base_net_7_branch0_2_bn_running_var', + use_global_stats=False, + name='_290') + _297 = fluid.layers.batch_norm( + _296, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch1_2_bn_weight', + bias_attr='_base_net_7_branch1_2_bn_bias', + moving_mean_name='_base_net_7_branch1_2_bn_running_mean', + moving_variance_name='_base_net_7_branch1_2_bn_running_var', + use_global_stats=False, + name='_297') + _304 = fluid.layers.batch_norm( + _303, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_2_bn_weight', + bias_attr='_base_net_7_branch2_2_bn_bias', + moving_mean_name='_base_net_7_branch2_2_bn_running_mean', + moving_variance_name='_base_net_7_branch2_2_bn_running_var', + use_global_stats=False, + name='_304') + _305 = fluid.layers.relu(_304, name='_305') + _306 = fluid.layers.conv2d( + _305, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[5, 5], + dilation=[5, 5], + groups=1, + param_attr='_base_net_7_branch2_3_conv_weight', + name='_306', + bias_attr=False) + _307 = fluid.layers.batch_norm( + _306, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_3_bn_weight', + bias_attr='_base_net_7_branch2_3_bn_bias', + moving_mean_name='_base_net_7_branch2_3_bn_running_mean', + moving_variance_name='_base_net_7_branch2_3_bn_running_var', + use_global_stats=False, + name='_307') + _308 = fluid.layers.concat([_290, _297, _307], axis=1) + _309 = fluid.layers.conv2d( + _308, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_ConvLinear_conv_weight', + name='_309', + bias_attr=False) + _310 = fluid.layers.batch_norm( + _309, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_ConvLinear_bn_weight', + bias_attr='_base_net_7_ConvLinear_bn_bias', + moving_mean_name='_base_net_7_ConvLinear_bn_running_mean', + moving_variance_name='_base_net_7_ConvLinear_bn_running_var', + use_global_stats=False, + name='_310') + _313 = fluid.layers.elementwise_add(x=_310, y=_312, name='_313') + _314 = fluid.layers.relu(_313, name='_314') + _315 = fluid.layers.conv2d( + _314, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_classification_headers_0_0_weight', + name='_315', + bias_attr='_classification_headers_0_0_bias') + _329 = fluid.layers.conv2d( + _314, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_regression_headers_0_0_weight', + name='_329', + bias_attr='_regression_headers_0_0_bias') + _343 = fluid.layers.conv2d( + _314, + num_filters=64, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_base_net_8_0_weight', + name='_343', + bias_attr=False) + _316 = fluid.layers.relu(_315, name='_316') + _330 = fluid.layers.relu(_329, name='_330') + _344 = fluid.layers.batch_norm( + _343, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_8_1_weight', + bias_attr='_base_net_8_1_bias', + moving_mean_name='_base_net_8_1_running_mean', + moving_variance_name='_base_net_8_1_running_var', + use_global_stats=False, + name='_344') + _317 = fluid.layers.conv2d( + _316, + num_filters=6, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_0_2_weight', + name='_317', + bias_attr='_classification_headers_0_2_bias') + _331 = fluid.layers.conv2d( + _330, + num_filters=12, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_0_2_weight', + name='_331', + bias_attr='_regression_headers_0_2_bias') + _345 = fluid.layers.relu(_344, name='_345') + _318 = fluid.layers.transpose(_317, perm=[0, 2, 3, 1], name='_318') + _332 = fluid.layers.transpose(_331, perm=[0, 2, 3, 1], name='_332') + _346 = fluid.layers.conv2d( + _345, + num_filters=128, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_8_3_weight', + name='_346', + bias_attr=False) + _320 = fluid.layers.shape(_318) + _334 = fluid.layers.shape(_332) + _347 = fluid.layers.batch_norm( + _346, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_8_4_weight', + bias_attr='_base_net_8_4_bias', + moving_mean_name='_base_net_8_4_running_mean', + moving_variance_name='_base_net_8_4_running_var', + use_global_stats=False, + name='_347') + _321 = fluid.layers.gather(input=_320, index=_319) + _335 = fluid.layers.gather(input=_334, index=_333) + _348 = fluid.layers.relu(_347, name='_348') + _324 = fluid.layers.assign(_321) + _338 = fluid.layers.assign(_335) + _349 = fluid.layers.conv2d( + _348, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_base_net_9_0_weight', + name='_349', + bias_attr=False) + _327 = fluid.layers.concat([_324, _325, _326], axis=0) + _341 = fluid.layers.concat([_338, _339, _340], axis=0) + _350 = fluid.layers.batch_norm( + _349, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_9_1_weight', + bias_attr='_base_net_9_1_bias', + moving_mean_name='_base_net_9_1_running_mean', + moving_variance_name='_base_net_9_1_running_var', + use_global_stats=False, + name='_350') + _327_cast = fluid.layers.cast(_327, dtype='int32') + _328 = fluid.layers.reshape(_318, name='_328', actual_shape=_327_cast, shape=[1, -1, 2]) + _341_cast = fluid.layers.cast(_341, dtype='int32') + _342 = fluid.layers.reshape(_332, name='_342', actual_shape=_341_cast, shape=[1, -1, 4]) + _351 = fluid.layers.relu(_350, name='_351') + _352 = fluid.layers.conv2d( + _351, + num_filters=128, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_9_3_weight', + name='_352', + bias_attr=False) + _353 = fluid.layers.batch_norm( + _352, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_9_4_weight', + bias_attr='_base_net_9_4_bias', + moving_mean_name='_base_net_9_4_running_mean', + moving_variance_name='_base_net_9_4_running_var', + use_global_stats=False, + name='_353') + _354 = fluid.layers.relu(_353, name='_354') + _355 = fluid.layers.conv2d( + _354, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_base_net_10_0_weight', + name='_355', + bias_attr=False) + _356 = fluid.layers.batch_norm( + _355, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_10_1_weight', + bias_attr='_base_net_10_1_bias', + moving_mean_name='_base_net_10_1_running_mean', + moving_variance_name='_base_net_10_1_running_var', + use_global_stats=False, + name='_356') + _357 = fluid.layers.relu(_356, name='_357') + _358 = fluid.layers.conv2d( + _357, + num_filters=128, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_10_3_weight', + name='_358', + bias_attr=False) + _359 = fluid.layers.batch_norm( + _358, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_10_4_weight', + bias_attr='_base_net_10_4_bias', + moving_mean_name='_base_net_10_4_running_mean', + moving_variance_name='_base_net_10_4_running_var', + use_global_stats=False, + name='_359') + _360 = fluid.layers.relu(_359, name='_360') + _361 = fluid.layers.conv2d( + _360, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_classification_headers_1_0_weight', + name='_361', + bias_attr='_classification_headers_1_0_bias') + _375 = fluid.layers.conv2d( + _360, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_regression_headers_1_0_weight', + name='_375', + bias_attr='_regression_headers_1_0_bias') + _389 = fluid.layers.conv2d( + _360, + num_filters=128, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_base_net_11_0_weight', + name='_389', + bias_attr=False) + _362 = fluid.layers.relu(_361, name='_362') + _376 = fluid.layers.relu(_375, name='_376') + _390 = fluid.layers.batch_norm( + _389, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_11_1_weight', + bias_attr='_base_net_11_1_bias', + moving_mean_name='_base_net_11_1_running_mean', + moving_variance_name='_base_net_11_1_running_var', + use_global_stats=False, + name='_390') + _363 = fluid.layers.conv2d( + _362, + num_filters=4, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_1_2_weight', + name='_363', + bias_attr='_classification_headers_1_2_bias') + _377 = fluid.layers.conv2d( + _376, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_1_2_weight', + name='_377', + bias_attr='_regression_headers_1_2_bias') + _391 = fluid.layers.relu(_390, name='_391') + _364 = fluid.layers.transpose(_363, perm=[0, 2, 3, 1], name='_364') + _378 = fluid.layers.transpose(_377, perm=[0, 2, 3, 1], name='_378') + _392 = fluid.layers.conv2d( + _391, + num_filters=256, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_11_3_weight', + name='_392', + bias_attr=False) + _366 = fluid.layers.shape(_364) + _380 = fluid.layers.shape(_378) + _393 = fluid.layers.batch_norm( + _392, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_11_4_weight', + bias_attr='_base_net_11_4_bias', + moving_mean_name='_base_net_11_4_running_mean', + moving_variance_name='_base_net_11_4_running_var', + use_global_stats=False, + name='_393') + _367 = fluid.layers.gather(input=_366, index=_365) + _381 = fluid.layers.gather(input=_380, index=_379) + _394 = fluid.layers.relu(_393, name='_394') + _370 = fluid.layers.assign(_367) + _384 = fluid.layers.assign(_381) + _395 = fluid.layers.conv2d( + _394, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=256, + param_attr='_base_net_12_0_weight', + name='_395', + bias_attr=False) + _373 = fluid.layers.concat([_370, _371, _372], axis=0) + _387 = fluid.layers.concat([_384, _385, _386], axis=0) + _396 = fluid.layers.batch_norm( + _395, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_12_1_weight', + bias_attr='_base_net_12_1_bias', + moving_mean_name='_base_net_12_1_running_mean', + moving_variance_name='_base_net_12_1_running_var', + use_global_stats=False, + name='_396') + _373_cast = fluid.layers.cast(_373, dtype='int32') + _374 = fluid.layers.reshape(_364, name='_374', actual_shape=_373_cast, shape=[1, -1, 2]) + _387_cast = fluid.layers.cast(_387, dtype='int32') + _388 = fluid.layers.reshape(_378, name='_388', actual_shape=_387_cast, shape=[1, -1, 4]) + _397 = fluid.layers.relu(_396, name='_397') + _398 = fluid.layers.conv2d( + _397, + num_filters=256, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_12_3_weight', + name='_398', + bias_attr=False) + _399 = fluid.layers.batch_norm( + _398, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_12_4_weight', + bias_attr='_base_net_12_4_bias', + moving_mean_name='_base_net_12_4_running_mean', + moving_variance_name='_base_net_12_4_running_var', + use_global_stats=False, + name='_399') + _400 = fluid.layers.relu(_399, name='_400') + _401 = fluid.layers.conv2d( + _400, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=256, + param_attr='_classification_headers_2_0_weight', + name='_401', + bias_attr='_classification_headers_2_0_bias') + _415 = fluid.layers.conv2d( + _400, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=256, + param_attr='_regression_headers_2_0_weight', + name='_415', + bias_attr='_regression_headers_2_0_bias') + _429 = fluid.layers.conv2d( + _400, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_extras_0_0_weight', + name='_429', + bias_attr='_extras_0_0_bias') + _402 = fluid.layers.relu(_401, name='_402') + _416 = fluid.layers.relu(_415, name='_416') + _430 = fluid.layers.relu(_429, name='_430') + _403 = fluid.layers.conv2d( + _402, + num_filters=4, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_2_2_weight', + name='_403', + bias_attr='_classification_headers_2_2_bias') + _417 = fluid.layers.conv2d( + _416, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_2_2_weight', + name='_417', + bias_attr='_regression_headers_2_2_bias') + _431 = fluid.layers.conv2d( + _430, + num_filters=64, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_extras_0_2_0_weight', + name='_431', + bias_attr='_extras_0_2_0_bias') + _404 = fluid.layers.transpose(_403, perm=[0, 2, 3, 1], name='_404') + _418 = fluid.layers.transpose(_417, perm=[0, 2, 3, 1], name='_418') + _432 = fluid.layers.relu(_431, name='_432') + _406 = fluid.layers.shape(_404) + _420 = fluid.layers.shape(_418) + _433 = fluid.layers.conv2d( + _432, + num_filters=256, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_extras_0_2_2_weight', + name='_433', + bias_attr='_extras_0_2_2_bias') + _407 = fluid.layers.gather(input=_406, index=_405) + _421 = fluid.layers.gather(input=_420, index=_419) + _434 = fluid.layers.relu(_433, name='_434') + _410 = fluid.layers.assign(_407) + _424 = fluid.layers.assign(_421) + _435 = fluid.layers.conv2d( + _434, + num_filters=6, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_3_weight', + name='_435', + bias_attr='_classification_headers_3_bias') + _447 = fluid.layers.conv2d( + _434, + num_filters=12, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_3_weight', + name='_447', + bias_attr='_regression_headers_3_bias') + _413 = fluid.layers.concat([_410, _411, _412], axis=0) + _427 = fluid.layers.concat([_424, _425, _426], axis=0) + _436 = fluid.layers.transpose(_435, perm=[0, 2, 3, 1], name='_436') + _448 = fluid.layers.transpose(_447, perm=[0, 2, 3, 1], name='_448') + _413_cast = fluid.layers.cast(_413, dtype='int32') + _414 = fluid.layers.reshape(_404, name='_414', actual_shape=_413_cast, shape=[1, -1, 2]) + _427_cast = fluid.layers.cast(_427, dtype='int32') + _428 = fluid.layers.reshape(_418, name='_428', actual_shape=_427_cast, shape=[1, -1, 4]) + _438 = fluid.layers.shape(_436) + _450 = fluid.layers.shape(_448) + _439 = fluid.layers.gather(input=_438, index=_437) + _451 = fluid.layers.gather(input=_450, index=_449) + _442 = fluid.layers.assign(_439) + _454 = fluid.layers.assign(_451) + _445 = fluid.layers.concat([_442, _443, _444], axis=0) + _457 = fluid.layers.concat([_454, _455, _456], axis=0) + _445_cast = fluid.layers.cast(_445, dtype='int32') + _446 = fluid.layers.reshape(_436, name='_446', actual_shape=_445_cast, shape=[1, -1, 2]) + _457_cast = fluid.layers.cast(_457, dtype='int32') + _458 = fluid.layers.reshape(_448, name='_458', actual_shape=_457_cast, shape=[1, -1, 4]) + _459 = fluid.layers.concat([_328, _374, _414, _446], axis=1) + _460 = fluid.layers.concat([_342, _388, _428, _458], axis=1) + _scores = fluid.layers.softmax(_459, axis=2, name='_scores') + _462 = fluid.layers.slice(_460, axes=[2], starts=[0], ends=[2]) + _469 = fluid.layers.slice(_460, axes=[2], starts=[2], ends=[4]) + _464 = fluid.layers.elementwise_mul(x=_462, y=_463, name='_464') + _471 = fluid.layers.elementwise_mul(x=_469, y=_470, name='_471') + _466 = fluid.layers.elementwise_mul(x=_464, y=_465, name='_466') + _472 = fluid.layers.exp(_471, name='_472') + _468 = fluid.layers.elementwise_add(x=_466, y=_467, name='_468') + _474 = fluid.layers.elementwise_mul(x=_472, y=_473, name='_474') + _475 = fluid.layers.concat([_468, _474], axis=2) + _476 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) + _477 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) + _481 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) + _482 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) + _479 = fluid.layers.elementwise_div(x=_477, y=_478, name='_479') + _484 = fluid.layers.elementwise_div(x=_482, y=_483, name='_484') + _480 = fluid.layers.elementwise_sub(x=_476, y=_479, name='_480') + _485 = fluid.layers.elementwise_add(x=_481, y=_484, name='_485') + _boxes = fluid.layers.concat([_480, _485], axis=2) + return [_input], [_scores, _boxes] + + +def run_net(param_dir="./"): + import os + inputs, outputs = face_detector_320() + for i, out in enumerate(outputs): + if isinstance(out, list): + for out_part in out: + outputs.append(out_part) + del outputs[i] + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + + def if_exist(var): + b = os.path.exists(os.path.join(param_dir, var.name)) + return b + + fluid.io.load_vars(exe, param_dir, fluid.default_main_program(), predicate=if_exist) diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8155ae4dbd2ec4ad0d0c50e42441f838f6d196b5 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py @@ -0,0 +1,205 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from ultra_light_fast_generic_face_detector_1mb_320.processor import postprocess, base64_to_cv2 +from ultra_light_fast_generic_face_detector_1mb_320.data_feed import reader + + +@moduleinfo( + name="ultra_light_fast_generic_face_detector_1mb_320", + type="CV/face_detection", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary= + "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", + version="1.1.2") +class FaceDetector320(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, + "ultra_light_fast_generic_face_detector_1mb_320") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def face_detection(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + output_dir='face_detector_320_predict_output', + visualization=False, + confs_threshold=0.5, + iou_threshold=0.5): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + confs_threshold (float): threshold for confidence coefficient. + iou_threshold (float): threshold for iou. + + Returns: + res (list[dict()]): The result of face detection and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data and 'image' in data: + if paths is None: + paths = [] + paths += data['image'] + + # get all data + all_data = [] + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = [] + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.astype('float32')) + data_out = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + confidences = data_out[0].as_ndarray() + boxes = data_out[1].as_ndarray() + + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + confidences=confidences[i], + boxes=boxes[i], + orig_im=batch_data[i]['orig_im'], + orig_im_shape=batch_data[i]['orig_im_shape'], + orig_im_path=batch_data[i]['orig_im_path'], + output_dir=output_dir, + visualization=visualization, + confs_threshold=confs_threshold, + iou_threshold=iou_threshold) + res.append(out) + return res + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='face_detector_320_predict_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/processor.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..cac27c90a384412f4343aa4ca344cc32061dedf4 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/processor.py @@ -0,0 +1,144 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time + +import base64 +import cv2 +import numpy as np + +__all__ = ['postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def area_of(left_top, right_bottom): + hw = np.clip(right_bottom - left_top, 0.0, None) + return hw[..., 0] * hw[..., 1] + + +def iou_of(boxes0, boxes1, eps=1e-5): + overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + # _, indexes = scores.sort(descending=True) + indexes = np.argsort(scores) + # indexes = indexes[:candidate_size] + indexes = indexes[-candidate_size:] + while len(indexes) > 0: + # current = indexes[0] + current = indexes[-1] + picked.append(current) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + # indexes = indexes[1:] + indexes = indexes[:-1] + rest_boxes = boxes[indexes, :] + iou = iou_of(rest_boxes, np.expand_dims(current_box, axis=0)) + indexes = indexes[iou <= iou_threshold] + return box_scores[picked, :] + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def postprocess(confidences, + boxes, + orig_im, + orig_im_shape, + orig_im_path, + output_dir, + visualization, + confs_threshold=0.5, + iou_threshold=0.5): + """ + Postprocess output of network. one image at a time. + + Args: + confidences (numpy.ndarray): confidences, with shape [num, 2] + boxes (numpy.ndaray): boxes coordinate, with shape [num, 4] + orig_im (numpy.ndarray): original image. + orig_im_shape (list): shape pf original image. + orig_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + """ + output = {} + output['data'] = [] + if orig_im_path: + output['path'] = orig_im_path + picked_box_probs = [] + picked_labels = [] + for class_index in range(1, confidences.shape[1]): + probs = confidences[:, class_index] + mask = probs > confs_threshold + probs = probs[mask] + if probs.shape[0] == 0: + continue + subset_boxes = boxes[mask, :] + box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) + box_probs = hard_nms(box_probs, iou_threshold=iou_threshold, top_k=-1) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.shape[0]) + + if not picked_box_probs: + return output + + picked_box_probs = np.concatenate(picked_box_probs) + picked_box_probs[:, 0] *= orig_im_shape[1] + picked_box_probs[:, 1] *= orig_im_shape[0] + picked_box_probs[:, 2] *= orig_im_shape[1] + picked_box_probs[:, 3] *= orig_im_shape[0] + + for data in picked_box_probs: + output['data'].append({ + 'left': float(data[0]), + 'right': float(data[2]), + 'top': float(data[1]), + 'bottom': float(data[3]), + 'confidence': float(data[4]) + }) + + picked_box_probs = picked_box_probs[:, :4].astype(np.int32) + if visualization: + for i in range(picked_box_probs.shape[0]): + box = picked_box_probs[i] + cv2.rectangle(orig_im, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2) + check_dir(output_dir) + ext = os.path.splitext(orig_im_path) if orig_im_path else '' + ext = ext if ext else get_image_ext(orig_im) + orig_im_path = orig_im_path if orig_im_path else 'ndarray_{}{}'.format(time.time(), ext) + im_name = os.path.basename(orig_im_path) + im_save_path = os.path.join(output_dir, im_name) + output['save_path'] = im_save_path + cv2.imwrite(im_save_path, orig_im) + return output diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md similarity index 100% rename from hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md rename to modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/__init__.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/__init__.py rename to modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/__init__.py diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/data_feed.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..4a66ffa59b79cbe484f3b928efc9f468fec3f2bf --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/data_feed.py @@ -0,0 +1,52 @@ +# coding=utf-8 +import os +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def preprocess(orig_image): + image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (640, 480)) + image_mean = np.array([127, 127, 127]) + image = (image - image_mean) / 128.0 + image = np.transpose(image, [2, 0, 1]) + return image + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['orig_im'] = im + each['orig_im_shape'] = im.shape # height, width, channel + each['orig_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['orig_im'] = im + each['orig_im_path'] = None + each['orig_im_shape'] = im.shape # height, width, channel + component.append(each) + + for element in component: + element['image'] = preprocess(element['orig_im']) + yield element diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..d1b6d894eaef2520c4981e4c3a4d7302be5de603 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py @@ -0,0 +1,1186 @@ +# coding=utf-8 +from paddle.fluid.initializer import Constant +from paddle.fluid.param_attr import ParamAttr +import paddle.fluid as fluid + + +def face_detector(): + _319 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _322 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _323 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _333 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _336 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _337 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _365 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _368 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _369 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _379 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _382 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _383 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _405 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _408 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _409 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _419 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _422 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _423 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _437 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _440 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _441 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) + _449 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) + _452 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) + _453 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) + _463 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.10000000149011612) + _465 = fluid.layers.create_parameter( + dtype='float32', shape=[1, 17640, 2], name='_465', attr='_465', default_initializer=Constant(0.0)) + _467 = fluid.layers.create_parameter( + dtype='float32', shape=[1, 17640, 2], name='_467', attr='_467', default_initializer=Constant(0.0)) + _470 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.20000000298023224) + _473 = fluid.layers.create_parameter( + dtype='float32', shape=[1, 17640, 2], name='_473', attr='_473', default_initializer=Constant(0.0)) + _478 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) + _483 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) + _input = fluid.layers.data(dtype='float32', shape=[1, 3, 480, 640], name='_input', append_batch_size=False) + _325 = fluid.layers.assign(_322) + _326 = fluid.layers.assign(_323) + _339 = fluid.layers.assign(_336) + _340 = fluid.layers.assign(_337) + _371 = fluid.layers.assign(_368) + _372 = fluid.layers.assign(_369) + _385 = fluid.layers.assign(_382) + _386 = fluid.layers.assign(_383) + _411 = fluid.layers.assign(_408) + _412 = fluid.layers.assign(_409) + _425 = fluid.layers.assign(_422) + _426 = fluid.layers.assign(_423) + _443 = fluid.layers.assign(_440) + _444 = fluid.layers.assign(_441) + _455 = fluid.layers.assign(_452) + _456 = fluid.layers.assign(_453) + _245 = fluid.layers.conv2d( + _input, + num_filters=16, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_0_0_weight', + name='_245', + bias_attr=False) + _246 = fluid.layers.batch_norm( + _245, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_0_1_weight', + bias_attr='_base_net_0_1_bias', + moving_mean_name='_base_net_0_1_running_mean', + moving_variance_name='_base_net_0_1_running_var', + use_global_stats=False, + name='_246') + _247 = fluid.layers.relu(_246, name='_247') + _248 = fluid.layers.conv2d( + _247, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=16, + param_attr='_base_net_1_0_weight', + name='_248', + bias_attr=False) + _249 = fluid.layers.batch_norm( + _248, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_1_1_weight', + bias_attr='_base_net_1_1_bias', + moving_mean_name='_base_net_1_1_running_mean', + moving_variance_name='_base_net_1_1_running_var', + use_global_stats=False, + name='_249') + _250 = fluid.layers.relu(_249, name='_250') + _251 = fluid.layers.conv2d( + _250, + num_filters=32, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_1_3_weight', + name='_251', + bias_attr=False) + _252 = fluid.layers.batch_norm( + _251, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_1_4_weight', + bias_attr='_base_net_1_4_bias', + moving_mean_name='_base_net_1_4_running_mean', + moving_variance_name='_base_net_1_4_running_var', + use_global_stats=False, + name='_252') + _253 = fluid.layers.relu(_252, name='_253') + _254 = fluid.layers.conv2d( + _253, + num_filters=32, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=32, + param_attr='_base_net_2_0_weight', + name='_254', + bias_attr=False) + _255 = fluid.layers.batch_norm( + _254, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_2_1_weight', + bias_attr='_base_net_2_1_bias', + moving_mean_name='_base_net_2_1_running_mean', + moving_variance_name='_base_net_2_1_running_var', + use_global_stats=False, + name='_255') + _256 = fluid.layers.relu(_255, name='_256') + _257 = fluid.layers.conv2d( + _256, + num_filters=32, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_2_3_weight', + name='_257', + bias_attr=False) + _258 = fluid.layers.batch_norm( + _257, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_2_4_weight', + bias_attr='_base_net_2_4_bias', + moving_mean_name='_base_net_2_4_running_mean', + moving_variance_name='_base_net_2_4_running_var', + use_global_stats=False, + name='_258') + _259 = fluid.layers.relu(_258, name='_259') + _260 = fluid.layers.conv2d( + _259, + num_filters=32, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=32, + param_attr='_base_net_3_0_weight', + name='_260', + bias_attr=False) + _261 = fluid.layers.batch_norm( + _260, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_3_1_weight', + bias_attr='_base_net_3_1_bias', + moving_mean_name='_base_net_3_1_running_mean', + moving_variance_name='_base_net_3_1_running_var', + use_global_stats=False, + name='_261') + _262 = fluid.layers.relu(_261, name='_262') + _263 = fluid.layers.conv2d( + _262, + num_filters=32, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_3_3_weight', + name='_263', + bias_attr=False) + _264 = fluid.layers.batch_norm( + _263, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_3_4_weight', + bias_attr='_base_net_3_4_bias', + moving_mean_name='_base_net_3_4_running_mean', + moving_variance_name='_base_net_3_4_running_var', + use_global_stats=False, + name='_264') + _265 = fluid.layers.relu(_264, name='_265') + _266 = fluid.layers.conv2d( + _265, + num_filters=32, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=32, + param_attr='_base_net_4_0_weight', + name='_266', + bias_attr=False) + _267 = fluid.layers.batch_norm( + _266, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_4_1_weight', + bias_attr='_base_net_4_1_bias', + moving_mean_name='_base_net_4_1_running_mean', + moving_variance_name='_base_net_4_1_running_var', + use_global_stats=False, + name='_267') + _268 = fluid.layers.relu(_267, name='_268') + _269 = fluid.layers.conv2d( + _268, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_4_3_weight', + name='_269', + bias_attr=False) + _270 = fluid.layers.batch_norm( + _269, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_4_4_weight', + bias_attr='_base_net_4_4_bias', + moving_mean_name='_base_net_4_4_running_mean', + moving_variance_name='_base_net_4_4_running_var', + use_global_stats=False, + name='_270') + _271 = fluid.layers.relu(_270, name='_271') + _272 = fluid.layers.conv2d( + _271, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_base_net_5_0_weight', + name='_272', + bias_attr=False) + _273 = fluid.layers.batch_norm( + _272, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_5_1_weight', + bias_attr='_base_net_5_1_bias', + moving_mean_name='_base_net_5_1_running_mean', + moving_variance_name='_base_net_5_1_running_var', + use_global_stats=False, + name='_273') + _274 = fluid.layers.relu(_273, name='_274') + _275 = fluid.layers.conv2d( + _274, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_5_3_weight', + name='_275', + bias_attr=False) + _276 = fluid.layers.batch_norm( + _275, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_5_4_weight', + bias_attr='_base_net_5_4_bias', + moving_mean_name='_base_net_5_4_running_mean', + moving_variance_name='_base_net_5_4_running_var', + use_global_stats=False, + name='_276') + _277 = fluid.layers.relu(_276, name='_277') + _278 = fluid.layers.conv2d( + _277, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_base_net_6_0_weight', + name='_278', + bias_attr=False) + _279 = fluid.layers.batch_norm( + _278, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_6_1_weight', + bias_attr='_base_net_6_1_bias', + moving_mean_name='_base_net_6_1_running_mean', + moving_variance_name='_base_net_6_1_running_var', + use_global_stats=False, + name='_279') + _280 = fluid.layers.relu(_279, name='_280') + _281 = fluid.layers.conv2d( + _280, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_6_3_weight', + name='_281', + bias_attr=False) + _282 = fluid.layers.batch_norm( + _281, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_6_4_weight', + bias_attr='_base_net_6_4_bias', + moving_mean_name='_base_net_6_4_running_mean', + moving_variance_name='_base_net_6_4_running_var', + use_global_stats=False, + name='_282') + _283 = fluid.layers.relu(_282, name='_283') + _284 = fluid.layers.conv2d( + _283, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch0_0_conv_weight', + name='_284', + bias_attr=False) + _291 = fluid.layers.conv2d( + _283, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch1_0_conv_weight', + name='_291', + bias_attr=False) + _298 = fluid.layers.conv2d( + _283, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch2_0_conv_weight', + name='_298', + bias_attr=False) + _311 = fluid.layers.conv2d( + _283, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_shortcut_conv_weight', + name='_311', + bias_attr=False) + _285 = fluid.layers.batch_norm( + _284, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch0_0_bn_weight', + bias_attr='_base_net_7_branch0_0_bn_bias', + moving_mean_name='_base_net_7_branch0_0_bn_running_mean', + moving_variance_name='_base_net_7_branch0_0_bn_running_var', + use_global_stats=False, + name='_285') + _292 = fluid.layers.batch_norm( + _291, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch1_0_bn_weight', + bias_attr='_base_net_7_branch1_0_bn_bias', + moving_mean_name='_base_net_7_branch1_0_bn_running_mean', + moving_variance_name='_base_net_7_branch1_0_bn_running_var', + use_global_stats=False, + name='_292') + _299 = fluid.layers.batch_norm( + _298, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_0_bn_weight', + bias_attr='_base_net_7_branch2_0_bn_bias', + moving_mean_name='_base_net_7_branch2_0_bn_running_mean', + moving_variance_name='_base_net_7_branch2_0_bn_running_var', + use_global_stats=False, + name='_299') + _312 = fluid.layers.batch_norm( + _311, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_shortcut_bn_weight', + bias_attr='_base_net_7_shortcut_bn_bias', + moving_mean_name='_base_net_7_shortcut_bn_running_mean', + moving_variance_name='_base_net_7_shortcut_bn_running_var', + use_global_stats=False, + name='_312') + _286 = fluid.layers.conv2d( + _285, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch0_1_conv_weight', + name='_286', + bias_attr=False) + _293 = fluid.layers.conv2d( + _292, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch1_1_conv_weight', + name='_293', + bias_attr=False) + _300 = fluid.layers.conv2d( + _299, + num_filters=12, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch2_1_conv_weight', + name='_300', + bias_attr=False) + _287 = fluid.layers.batch_norm( + _286, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch0_1_bn_weight', + bias_attr='_base_net_7_branch0_1_bn_bias', + moving_mean_name='_base_net_7_branch0_1_bn_running_mean', + moving_variance_name='_base_net_7_branch0_1_bn_running_var', + use_global_stats=False, + name='_287') + _294 = fluid.layers.batch_norm( + _293, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch1_1_bn_weight', + bias_attr='_base_net_7_branch1_1_bn_bias', + moving_mean_name='_base_net_7_branch1_1_bn_running_mean', + moving_variance_name='_base_net_7_branch1_1_bn_running_var', + use_global_stats=False, + name='_294') + _301 = fluid.layers.batch_norm( + _300, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_1_bn_weight', + bias_attr='_base_net_7_branch2_1_bn_bias', + moving_mean_name='_base_net_7_branch2_1_bn_running_mean', + moving_variance_name='_base_net_7_branch2_1_bn_running_var', + use_global_stats=False, + name='_301') + _288 = fluid.layers.relu(_287, name='_288') + _295 = fluid.layers.relu(_294, name='_295') + _302 = fluid.layers.relu(_301, name='_302') + _289 = fluid.layers.conv2d( + _288, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[2, 2], + dilation=[2, 2], + groups=1, + param_attr='_base_net_7_branch0_2_conv_weight', + name='_289', + bias_attr=False) + _296 = fluid.layers.conv2d( + _295, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[3, 3], + dilation=[3, 3], + groups=1, + param_attr='_base_net_7_branch1_2_conv_weight', + name='_296', + bias_attr=False) + _303 = fluid.layers.conv2d( + _302, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_branch2_2_conv_weight', + name='_303', + bias_attr=False) + _290 = fluid.layers.batch_norm( + _289, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch0_2_bn_weight', + bias_attr='_base_net_7_branch0_2_bn_bias', + moving_mean_name='_base_net_7_branch0_2_bn_running_mean', + moving_variance_name='_base_net_7_branch0_2_bn_running_var', + use_global_stats=False, + name='_290') + _297 = fluid.layers.batch_norm( + _296, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch1_2_bn_weight', + bias_attr='_base_net_7_branch1_2_bn_bias', + moving_mean_name='_base_net_7_branch1_2_bn_running_mean', + moving_variance_name='_base_net_7_branch1_2_bn_running_var', + use_global_stats=False, + name='_297') + _304 = fluid.layers.batch_norm( + _303, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_2_bn_weight', + bias_attr='_base_net_7_branch2_2_bn_bias', + moving_mean_name='_base_net_7_branch2_2_bn_running_mean', + moving_variance_name='_base_net_7_branch2_2_bn_running_var', + use_global_stats=False, + name='_304') + _305 = fluid.layers.relu(_304, name='_305') + _306 = fluid.layers.conv2d( + _305, + num_filters=16, + filter_size=[3, 3], + stride=[1, 1], + padding=[5, 5], + dilation=[5, 5], + groups=1, + param_attr='_base_net_7_branch2_3_conv_weight', + name='_306', + bias_attr=False) + _307 = fluid.layers.batch_norm( + _306, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_branch2_3_bn_weight', + bias_attr='_base_net_7_branch2_3_bn_bias', + moving_mean_name='_base_net_7_branch2_3_bn_running_mean', + moving_variance_name='_base_net_7_branch2_3_bn_running_var', + use_global_stats=False, + name='_307') + _308 = fluid.layers.concat([_290, _297, _307], axis=1) + _309 = fluid.layers.conv2d( + _308, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_7_ConvLinear_conv_weight', + name='_309', + bias_attr=False) + _310 = fluid.layers.batch_norm( + _309, + momentum=0.9900000095367432, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_7_ConvLinear_bn_weight', + bias_attr='_base_net_7_ConvLinear_bn_bias', + moving_mean_name='_base_net_7_ConvLinear_bn_running_mean', + moving_variance_name='_base_net_7_ConvLinear_bn_running_var', + use_global_stats=False, + name='_310') + _313 = fluid.layers.elementwise_add(x=_310, y=_312, name='_313') + _314 = fluid.layers.relu(_313, name='_314') + _315 = fluid.layers.conv2d( + _314, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_classification_headers_0_0_weight', + name='_315', + bias_attr='_classification_headers_0_0_bias') + _329 = fluid.layers.conv2d( + _314, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_regression_headers_0_0_weight', + name='_329', + bias_attr='_regression_headers_0_0_bias') + _343 = fluid.layers.conv2d( + _314, + num_filters=64, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_base_net_8_0_weight', + name='_343', + bias_attr=False) + _316 = fluid.layers.relu(_315, name='_316') + _330 = fluid.layers.relu(_329, name='_330') + _344 = fluid.layers.batch_norm( + _343, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_8_1_weight', + bias_attr='_base_net_8_1_bias', + moving_mean_name='_base_net_8_1_running_mean', + moving_variance_name='_base_net_8_1_running_var', + use_global_stats=False, + name='_344') + _317 = fluid.layers.conv2d( + _316, + num_filters=6, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_0_2_weight', + name='_317', + bias_attr='_classification_headers_0_2_bias') + _331 = fluid.layers.conv2d( + _330, + num_filters=12, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_0_2_weight', + name='_331', + bias_attr='_regression_headers_0_2_bias') + _345 = fluid.layers.relu(_344, name='_345') + _318 = fluid.layers.transpose(_317, perm=[0, 2, 3, 1], name='_318') + _332 = fluid.layers.transpose(_331, perm=[0, 2, 3, 1], name='_332') + _346 = fluid.layers.conv2d( + _345, + num_filters=128, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_8_3_weight', + name='_346', + bias_attr=False) + _320 = fluid.layers.shape(_318) + _334 = fluid.layers.shape(_332) + _347 = fluid.layers.batch_norm( + _346, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_8_4_weight', + bias_attr='_base_net_8_4_bias', + moving_mean_name='_base_net_8_4_running_mean', + moving_variance_name='_base_net_8_4_running_var', + use_global_stats=False, + name='_347') + _321 = fluid.layers.gather(input=_320, index=_319) + _335 = fluid.layers.gather(input=_334, index=_333) + _348 = fluid.layers.relu(_347, name='_348') + _324 = fluid.layers.assign(_321) + _338 = fluid.layers.assign(_335) + _349 = fluid.layers.conv2d( + _348, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_base_net_9_0_weight', + name='_349', + bias_attr=False) + _327 = fluid.layers.concat([_324, _325, _326], axis=0) + _341 = fluid.layers.concat([_338, _339, _340], axis=0) + _350 = fluid.layers.batch_norm( + _349, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_9_1_weight', + bias_attr='_base_net_9_1_bias', + moving_mean_name='_base_net_9_1_running_mean', + moving_variance_name='_base_net_9_1_running_var', + use_global_stats=False, + name='_350') + _327_cast = fluid.layers.cast(_327, dtype='int32') + _328 = fluid.layers.reshape(_318, name='_328', actual_shape=_327_cast, shape=[1, -1, 2]) + _341_cast = fluid.layers.cast(_341, dtype='int32') + _342 = fluid.layers.reshape(_332, name='_342', actual_shape=_341_cast, shape=[1, -1, 4]) + _351 = fluid.layers.relu(_350, name='_351') + _352 = fluid.layers.conv2d( + _351, + num_filters=128, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_9_3_weight', + name='_352', + bias_attr=False) + _353 = fluid.layers.batch_norm( + _352, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_9_4_weight', + bias_attr='_base_net_9_4_bias', + moving_mean_name='_base_net_9_4_running_mean', + moving_variance_name='_base_net_9_4_running_var', + use_global_stats=False, + name='_353') + _354 = fluid.layers.relu(_353, name='_354') + _355 = fluid.layers.conv2d( + _354, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_base_net_10_0_weight', + name='_355', + bias_attr=False) + _356 = fluid.layers.batch_norm( + _355, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_10_1_weight', + bias_attr='_base_net_10_1_bias', + moving_mean_name='_base_net_10_1_running_mean', + moving_variance_name='_base_net_10_1_running_var', + use_global_stats=False, + name='_356') + _357 = fluid.layers.relu(_356, name='_357') + _358 = fluid.layers.conv2d( + _357, + num_filters=128, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_10_3_weight', + name='_358', + bias_attr=False) + _359 = fluid.layers.batch_norm( + _358, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_10_4_weight', + bias_attr='_base_net_10_4_bias', + moving_mean_name='_base_net_10_4_running_mean', + moving_variance_name='_base_net_10_4_running_var', + use_global_stats=False, + name='_359') + _360 = fluid.layers.relu(_359, name='_360') + _361 = fluid.layers.conv2d( + _360, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_classification_headers_1_0_weight', + name='_361', + bias_attr='_classification_headers_1_0_bias') + _375 = fluid.layers.conv2d( + _360, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_regression_headers_1_0_weight', + name='_375', + bias_attr='_regression_headers_1_0_bias') + _389 = fluid.layers.conv2d( + _360, + num_filters=128, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=128, + param_attr='_base_net_11_0_weight', + name='_389', + bias_attr=False) + _362 = fluid.layers.relu(_361, name='_362') + _376 = fluid.layers.relu(_375, name='_376') + _390 = fluid.layers.batch_norm( + _389, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_11_1_weight', + bias_attr='_base_net_11_1_bias', + moving_mean_name='_base_net_11_1_running_mean', + moving_variance_name='_base_net_11_1_running_var', + use_global_stats=False, + name='_390') + _363 = fluid.layers.conv2d( + _362, + num_filters=4, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_1_2_weight', + name='_363', + bias_attr='_classification_headers_1_2_bias') + _377 = fluid.layers.conv2d( + _376, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_1_2_weight', + name='_377', + bias_attr='_regression_headers_1_2_bias') + _391 = fluid.layers.relu(_390, name='_391') + _364 = fluid.layers.transpose(_363, perm=[0, 2, 3, 1], name='_364') + _378 = fluid.layers.transpose(_377, perm=[0, 2, 3, 1], name='_378') + _392 = fluid.layers.conv2d( + _391, + num_filters=256, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_11_3_weight', + name='_392', + bias_attr=False) + _366 = fluid.layers.shape(_364) + _380 = fluid.layers.shape(_378) + _393 = fluid.layers.batch_norm( + _392, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_11_4_weight', + bias_attr='_base_net_11_4_bias', + moving_mean_name='_base_net_11_4_running_mean', + moving_variance_name='_base_net_11_4_running_var', + use_global_stats=False, + name='_393') + _367 = fluid.layers.gather(input=_366, index=_365) + _381 = fluid.layers.gather(input=_380, index=_379) + _394 = fluid.layers.relu(_393, name='_394') + _370 = fluid.layers.assign(_367) + _384 = fluid.layers.assign(_381) + _395 = fluid.layers.conv2d( + _394, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=256, + param_attr='_base_net_12_0_weight', + name='_395', + bias_attr=False) + _373 = fluid.layers.concat([_370, _371, _372], axis=0) + _387 = fluid.layers.concat([_384, _385, _386], axis=0) + _396 = fluid.layers.batch_norm( + _395, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_12_1_weight', + bias_attr='_base_net_12_1_bias', + moving_mean_name='_base_net_12_1_running_mean', + moving_variance_name='_base_net_12_1_running_var', + use_global_stats=False, + name='_396') + _373_cast = fluid.layers.cast(_373, dtype='int32') + _374 = fluid.layers.reshape(_364, name='_374', actual_shape=_373_cast, shape=[1, -1, 2]) + _387_cast = fluid.layers.cast(_387, dtype='int32') + _388 = fluid.layers.reshape(_378, name='_388', actual_shape=_387_cast, shape=[1, -1, 4]) + _397 = fluid.layers.relu(_396, name='_397') + _398 = fluid.layers.conv2d( + _397, + num_filters=256, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_base_net_12_3_weight', + name='_398', + bias_attr=False) + _399 = fluid.layers.batch_norm( + _398, + momentum=0.8999999761581421, + epsilon=9.999999747378752e-06, + data_layout='NCHW', + is_test=True, + param_attr='_base_net_12_4_weight', + bias_attr='_base_net_12_4_bias', + moving_mean_name='_base_net_12_4_running_mean', + moving_variance_name='_base_net_12_4_running_var', + use_global_stats=False, + name='_399') + _400 = fluid.layers.relu(_399, name='_400') + _401 = fluid.layers.conv2d( + _400, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=256, + param_attr='_classification_headers_2_0_weight', + name='_401', + bias_attr='_classification_headers_2_0_bias') + _415 = fluid.layers.conv2d( + _400, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=256, + param_attr='_regression_headers_2_0_weight', + name='_415', + bias_attr='_regression_headers_2_0_bias') + _429 = fluid.layers.conv2d( + _400, + num_filters=64, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_extras_0_0_weight', + name='_429', + bias_attr='_extras_0_0_bias') + _402 = fluid.layers.relu(_401, name='_402') + _416 = fluid.layers.relu(_415, name='_416') + _430 = fluid.layers.relu(_429, name='_430') + _403 = fluid.layers.conv2d( + _402, + num_filters=4, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_2_2_weight', + name='_403', + bias_attr='_classification_headers_2_2_bias') + _417 = fluid.layers.conv2d( + _416, + num_filters=8, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_2_2_weight', + name='_417', + bias_attr='_regression_headers_2_2_bias') + _431 = fluid.layers.conv2d( + _430, + num_filters=64, + filter_size=[3, 3], + stride=[2, 2], + padding=[1, 1], + dilation=[1, 1], + groups=64, + param_attr='_extras_0_2_0_weight', + name='_431', + bias_attr='_extras_0_2_0_bias') + _404 = fluid.layers.transpose(_403, perm=[0, 2, 3, 1], name='_404') + _418 = fluid.layers.transpose(_417, perm=[0, 2, 3, 1], name='_418') + _432 = fluid.layers.relu(_431, name='_432') + _406 = fluid.layers.shape(_404) + _420 = fluid.layers.shape(_418) + _433 = fluid.layers.conv2d( + _432, + num_filters=256, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='_extras_0_2_2_weight', + name='_433', + bias_attr='_extras_0_2_2_bias') + _407 = fluid.layers.gather(input=_406, index=_405) + _421 = fluid.layers.gather(input=_420, index=_419) + _434 = fluid.layers.relu(_433, name='_434') + _410 = fluid.layers.assign(_407) + _424 = fluid.layers.assign(_421) + _435 = fluid.layers.conv2d( + _434, + num_filters=6, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_classification_headers_3_weight', + name='_435', + bias_attr='_classification_headers_3_bias') + _447 = fluid.layers.conv2d( + _434, + num_filters=12, + filter_size=[3, 3], + stride=[1, 1], + padding=[1, 1], + dilation=[1, 1], + groups=1, + param_attr='_regression_headers_3_weight', + name='_447', + bias_attr='_regression_headers_3_bias') + _413 = fluid.layers.concat([_410, _411, _412], axis=0) + _427 = fluid.layers.concat([_424, _425, _426], axis=0) + _436 = fluid.layers.transpose(_435, perm=[0, 2, 3, 1], name='_436') + _448 = fluid.layers.transpose(_447, perm=[0, 2, 3, 1], name='_448') + _413_cast = fluid.layers.cast(_413, dtype='int32') + _414 = fluid.layers.reshape(_404, name='_414', actual_shape=_413_cast, shape=[1, -1, 2]) + _427_cast = fluid.layers.cast(_427, dtype='int32') + _428 = fluid.layers.reshape(_418, name='_428', actual_shape=_427_cast, shape=[1, -1, 4]) + _438 = fluid.layers.shape(_436) + _450 = fluid.layers.shape(_448) + _439 = fluid.layers.gather(input=_438, index=_437) + _451 = fluid.layers.gather(input=_450, index=_449) + _442 = fluid.layers.assign(_439) + _454 = fluid.layers.assign(_451) + _445 = fluid.layers.concat([_442, _443, _444], axis=0) + _457 = fluid.layers.concat([_454, _455, _456], axis=0) + _445_cast = fluid.layers.cast(_445, dtype='int32') + _446 = fluid.layers.reshape(_436, name='_446', actual_shape=_445_cast, shape=[1, -1, 2]) + _457_cast = fluid.layers.cast(_457, dtype='int32') + _458 = fluid.layers.reshape(_448, name='_458', actual_shape=_457_cast, shape=[1, -1, 4]) + _459 = fluid.layers.concat([_328, _374, _414, _446], axis=1) + _460 = fluid.layers.concat([_342, _388, _428, _458], axis=1) + _scores = fluid.layers.softmax(_459, axis=2, name='_scores') + _462 = fluid.layers.slice(_460, axes=[2], starts=[0], ends=[2]) + _469 = fluid.layers.slice(_460, axes=[2], starts=[2], ends=[4]) + _464 = fluid.layers.elementwise_mul(x=_462, y=_463, name='_464') + _471 = fluid.layers.elementwise_mul(x=_469, y=_470, name='_471') + _466 = fluid.layers.elementwise_mul(x=_464, y=_465, name='_466') + _472 = fluid.layers.exp(_471, name='_472') + _468 = fluid.layers.elementwise_add(x=_466, y=_467, name='_468') + _474 = fluid.layers.elementwise_mul(x=_472, y=_473, name='_474') + _475 = fluid.layers.concat([_468, _474], axis=2) + _476 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) + _477 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) + _481 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) + _482 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) + _479 = fluid.layers.elementwise_div(x=_477, y=_478, name='_479') + _484 = fluid.layers.elementwise_div(x=_482, y=_483, name='_484') + _480 = fluid.layers.elementwise_sub(x=_476, y=_479, name='_480') + _485 = fluid.layers.elementwise_add(x=_481, y=_484, name='_485') + _boxes = fluid.layers.concat([_480, _485], axis=2) + + return [_input], [_scores, _boxes] + + +def run_net(param_dir="./"): + import os + inputs, outputs = face_detector() + for i, out in enumerate(outputs): + if isinstance(out, list): + for out_part in out: + outputs.append(out_part) + del outputs[i] + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + + def if_exist(var): + b = os.path.exists(os.path.join(param_dir, var.name)) + return b + + fluid.io.load_vars(exe, param_dir, fluid.default_main_program(), predicate=if_exist) diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b2f10457841d94d8746004f9aa5942a46ac054eb --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py @@ -0,0 +1,204 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from ultra_light_fast_generic_face_detector_1mb_640.processor import postprocess, base64_to_cv2 +from ultra_light_fast_generic_face_detector_1mb_640.data_feed import reader + + +@moduleinfo( + name="ultra_light_fast_generic_face_detector_1mb_640", + type="CV/face_detection", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary= + "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", + version="1.1.2") +class FaceDetector640(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, + "ultra_light_fast_generic_face_detector_1mb_640") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def face_detection(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + output_dir='face_detector_640_predict_output', + visualization=False, + confs_threshold=0.5, + iou_threshold=0.5): + """ + API for face detection. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + confs_threshold (float): threshold for confidence coefficient. + iou_threshold (float): threshold for iou. + Returns: + res (list[dict()]): The result of face detection and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data and 'image' in data: + if paths is None: + paths = [] + paths += data['image'] + + # get all data + all_data = [] + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = [] + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.astype('float32')) + data_out = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + confidences = data_out[0].as_ndarray() + boxes = data_out[1].as_ndarray() + + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + confidences=confidences[i], + boxes=boxes[i], + orig_im=batch_data[i]['orig_im'], + orig_im_shape=batch_data[i]['orig_im_shape'], + orig_im_path=batch_data[i]['orig_im_path'], + output_dir=output_dir, + visualization=visualization, + confs_threshold=confs_threshold, + iou_threshold=iou_threshold) + res.append(out) + return res + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.face_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.face_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='face_detector_640_predict_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/processor.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..cac27c90a384412f4343aa4ca344cc32061dedf4 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/processor.py @@ -0,0 +1,144 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time + +import base64 +import cv2 +import numpy as np + +__all__ = ['postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def area_of(left_top, right_bottom): + hw = np.clip(right_bottom - left_top, 0.0, None) + return hw[..., 0] * hw[..., 1] + + +def iou_of(boxes0, boxes1, eps=1e-5): + overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + # _, indexes = scores.sort(descending=True) + indexes = np.argsort(scores) + # indexes = indexes[:candidate_size] + indexes = indexes[-candidate_size:] + while len(indexes) > 0: + # current = indexes[0] + current = indexes[-1] + picked.append(current) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + # indexes = indexes[1:] + indexes = indexes[:-1] + rest_boxes = boxes[indexes, :] + iou = iou_of(rest_boxes, np.expand_dims(current_box, axis=0)) + indexes = indexes[iou <= iou_threshold] + return box_scores[picked, :] + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def postprocess(confidences, + boxes, + orig_im, + orig_im_shape, + orig_im_path, + output_dir, + visualization, + confs_threshold=0.5, + iou_threshold=0.5): + """ + Postprocess output of network. one image at a time. + + Args: + confidences (numpy.ndarray): confidences, with shape [num, 2] + boxes (numpy.ndaray): boxes coordinate, with shape [num, 4] + orig_im (numpy.ndarray): original image. + orig_im_shape (list): shape pf original image. + orig_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + """ + output = {} + output['data'] = [] + if orig_im_path: + output['path'] = orig_im_path + picked_box_probs = [] + picked_labels = [] + for class_index in range(1, confidences.shape[1]): + probs = confidences[:, class_index] + mask = probs > confs_threshold + probs = probs[mask] + if probs.shape[0] == 0: + continue + subset_boxes = boxes[mask, :] + box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) + box_probs = hard_nms(box_probs, iou_threshold=iou_threshold, top_k=-1) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.shape[0]) + + if not picked_box_probs: + return output + + picked_box_probs = np.concatenate(picked_box_probs) + picked_box_probs[:, 0] *= orig_im_shape[1] + picked_box_probs[:, 1] *= orig_im_shape[0] + picked_box_probs[:, 2] *= orig_im_shape[1] + picked_box_probs[:, 3] *= orig_im_shape[0] + + for data in picked_box_probs: + output['data'].append({ + 'left': float(data[0]), + 'right': float(data[2]), + 'top': float(data[1]), + 'bottom': float(data[3]), + 'confidence': float(data[4]) + }) + + picked_box_probs = picked_box_probs[:, :4].astype(np.int32) + if visualization: + for i in range(picked_box_probs.shape[0]): + box = picked_box_probs[i] + cv2.rectangle(orig_im, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 2) + check_dir(output_dir) + ext = os.path.splitext(orig_im_path) if orig_im_path else '' + ext = ext if ext else get_image_ext(orig_im) + orig_im_path = orig_im_path if orig_im_path else 'ndarray_{}{}'.format(time.time(), ext) + im_name = os.path.basename(orig_im_path) + im_save_path = os.path.join(output_dir, im_name) + output['save_path'] = im_save_path + cv2.imwrite(im_save_path, orig_im) + return output diff --git a/hub_module/modules/image/gan/README.md b/modules/image/gan/README.md similarity index 100% rename from hub_module/modules/image/gan/README.md rename to modules/image/gan/README.md diff --git a/hub_module/modules/image/keypoint_detection/README.md b/modules/image/keypoint_detection/README.md similarity index 100% rename from hub_module/modules/image/keypoint_detection/README.md rename to modules/image/keypoint_detection/README.md diff --git a/hub_module/modules/image/keypoint_detection/face_landmark_localization/README.md b/modules/image/keypoint_detection/face_landmark_localization/README.md similarity index 100% rename from hub_module/modules/image/keypoint_detection/face_landmark_localization/README.md rename to modules/image/keypoint_detection/face_landmark_localization/README.md diff --git a/hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/__init__.py b/modules/image/keypoint_detection/face_landmark_localization/__init__.py similarity index 100% rename from hub_module/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/__init__.py rename to modules/image/keypoint_detection/face_landmark_localization/__init__.py diff --git a/modules/image/keypoint_detection/face_landmark_localization/data_feed.py b/modules/image/keypoint_detection/face_landmark_localization/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..c270a20b6835368e364420d088efc952aeeecac1 --- /dev/null +++ b/modules/image/keypoint_detection/face_landmark_localization/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(face_detector, images=None, paths=None, use_gpu=False): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + components = [] + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['orig_im'] = im + each['orig_im_shape'] = im.shape + each['orig_im_path'] = im_path + components.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['orig_im'] = im + each['orig_im_path'] = None + each['orig_im_shape'] = im.shape + components.append(each) + + for idx, item in enumerate( + face_detector.face_detection( + images=[component['orig_im'] for component in components], use_gpu=use_gpu, visualization=False)): + for face in item['data']: + width = int(components[idx]['orig_im_shape'][1]) + height = int(components[idx]['orig_im_shape'][0]) + x1 = 0 if int(face['left']) < 0 else int(face['left']) + x2 = width if int(face['right']) > width else int(face['right']) + y1 = 0 if int(face['top']) < 0 else int(face['top']) + y2 = height if int(face['bottom']) > height else int(face['bottom']) + roi = components[idx]['orig_im'][y1:y2 + 1, x1:x2 + 1, :] + gray_img = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY) + gray_img = cv2.resize(gray_img, (60, 60), interpolation=cv2.INTER_CUBIC) + mean, std_dev = cv2.meanStdDev(gray_img) + gray_img = (gray_img - mean[0][0]) / (0.000001 + std_dev[0][0]) + gray_img = np.expand_dims(gray_img, axis=0) + yield { + 'face': gray_img, + 'x1': x1, + 'y1': y1, + 'x2': x2, + 'y2': y2, + 'orig_im': components[idx]['orig_im'], + 'orig_im_path': components[idx]['orig_im_path'], + 'orig_im_shape': components[idx]['orig_im_shape'], + 'id': idx + } diff --git a/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py b/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py new file mode 100644 index 0000000000000000000000000000000000000000..b2be891d68ea76934d1108094287830f8eda03aa --- /dev/null +++ b/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py @@ -0,0 +1,99 @@ +# coding=utf-8 +from __future__ import absolute_import + +import paddle.fluid as fluid + +__all__ = ["face_landmark_localization"] + + +def face_landmark_localization(image): + # image = fluid.layers.data(shape=[1, 60, 60], name='data', dtype='float32') + Conv1 = fluid.layers.conv2d( + image, + param_attr='Conv1_weights', + name='Conv1', + dilation=[1, 1], + filter_size=[5, 5], + stride=[1, 1], + groups=1, + bias_attr='Conv1_bias', + padding=[2, 2], + num_filters=20) + ActivationTangH1 = fluid.layers.tanh(Conv1, name='ActivationTangH1') + ActivationAbs1 = fluid.layers.abs(ActivationTangH1, name='ActivationAbs1') + Pool1 = fluid.layers.pool2d( + ActivationAbs1, + exclusive=False, + pool_type='max', + pool_padding=[0, 0], + name='Pool1', + global_pooling=False, + pool_stride=[2, 2], + ceil_mode=True, + pool_size=[2, 2]) + Conv2 = fluid.layers.conv2d( + Pool1, + param_attr='Conv2_weights', + name='Conv2', + dilation=[1, 1], + filter_size=[5, 5], + stride=[1, 1], + groups=1, + bias_attr='Conv2_bias', + padding=[2, 2], + num_filters=48) + ActivationTangH2 = fluid.layers.tanh(Conv2, name='ActivationTangH2') + ActivationAbs2 = fluid.layers.abs(ActivationTangH2, name='ActivationAbs2') + Pool2 = fluid.layers.pool2d( + ActivationAbs2, + exclusive=False, + pool_type='max', + pool_padding=[0, 0], + name='Pool2', + global_pooling=False, + pool_stride=[2, 2], + ceil_mode=True, + pool_size=[2, 2]) + Conv3 = fluid.layers.conv2d( + Pool2, + param_attr='Conv3_weights', + name='Conv3', + dilation=[1, 1], + filter_size=[3, 3], + stride=[1, 1], + groups=1, + bias_attr='Conv3_bias', + padding=[0, 0], + num_filters=64) + ActivationTangH3 = fluid.layers.tanh(Conv3, name='ActivationTangH3') + ActivationAbs3 = fluid.layers.abs(ActivationTangH3, name='ActivationAbs3') + Pool3 = fluid.layers.pool2d( + ActivationAbs3, + exclusive=False, + pool_type='max', + pool_padding=[0, 0], + name='Pool3', + global_pooling=False, + pool_stride=[2, 2], + ceil_mode=True, + pool_size=[3, 3]) + Conv4 = fluid.layers.conv2d( + Pool3, + param_attr='Conv4_weights', + name='Conv4', + dilation=[1, 1], + filter_size=[3, 3], + stride=[1, 1], + groups=1, + bias_attr='Conv4_bias', + padding=[0, 0], + num_filters=80) + ActivationTangH4 = fluid.layers.tanh(Conv4, name='ActivationTangH4') + ActivationAbs4 = fluid.layers.abs(ActivationTangH4, name='ActivationAbs4') + Dense1 = fluid.layers.fc( + ActivationAbs4, param_attr='Dense1_weights', act=None, name='Dense1', size=512, bias_attr='Dense1_bias') + ActivationTangH5 = fluid.layers.tanh(Dense1, name='ActivationTangH5') + ActivationAbs5 = fluid.layers.abs(ActivationTangH5, name='ActivationAbs5') + Dense3 = fluid.layers.fc( + ActivationAbs5, param_attr='Dense3_weights', act=None, name='Dense3', size=136, bias_attr='Dense3_bias') + return Dense3 diff --git a/modules/image/keypoint_detection/face_landmark_localization/module.py b/modules/image/keypoint_detection/face_landmark_localization/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c389fec43e7041bf3f4040ce38f5c9581a39ff65 --- /dev/null +++ b/modules/image/keypoint_detection/face_landmark_localization/module.py @@ -0,0 +1,203 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import time +import os +from collections import OrderedDict + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from face_landmark_localization.processor import postprocess, base64_to_cv2 +from face_landmark_localization.data_feed import reader + + +@moduleinfo( + name="face_landmark_localization", + type="CV/keypoint_detection", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary= + "Face_Landmark_Localization can be used to locate face landmark. This Module is trained through the MPII Human Pose dataset.", + version="1.0.2") +class FaceLandmarkLocalization(hub.Module): + def _initialize(self, face_detector_module=None): + """ + Args: + face_detector_module (class): module to detect face. + """ + self.default_pretrained_model_path = os.path.join(self.directory, "face_landmark_localization") + if face_detector_module is None: + self.face_detector = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_640") + else: + self.face_detector = face_detector_module + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def set_face_detector_module(self, face_detector_module): + """ + Set face detector. + + Args: + face_detector_module (class): module to detect face. + """ + self.face_detector = face_detector_module + + def get_face_detector_module(self): + return self.face_detector + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + face_landmark_dir = os.path.join(dirname, "face_landmark") + detector_dir = os.path.join(dirname, "detector") + + fluid.io.save_inference_model( + dirname=face_landmark_dir, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + self.face_detector.save_inference_model( + dirname=detector_dir, model_filename=model_filename, params_filename=params_filename, combined=combined) + + def keypoint_detection(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + output_dir='face_landmark_output', + visualization=False): + """ + API for face landmark. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + + Returns: + res (list[dict()]): The key points of face landmark and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # get all data + all_data = [] + for yield_data in reader(self.face_detector, images, paths, use_gpu): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = [] + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['face'] for data in batch_data]) + face_tensor = PaddleTensor(batch_image.astype('float32')) + pred_out = self.gpu_predictor.run([face_tensor]) if use_gpu else self.cpu_predictor.run([face_tensor]) + points = pred_out[0].as_ndarray() + for idx, sample in enumerate(batch_data): + sample['points'] = points[idx].reshape(68, -1) + res += batch_data + + res = postprocess(res, output_dir, visualization) + return res + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.keypoint_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.keypoint_detection( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default=None, help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/image/keypoint_detection/face_landmark_localization/processor.py b/modules/image/keypoint_detection/face_landmark_localization/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..2ac325479affb8e6df25a180066f085d4c1e4426 --- /dev/null +++ b/modules/image/keypoint_detection/face_landmark_localization/processor.py @@ -0,0 +1,70 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['check_dir', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def postprocess(res, output_dir, visualization): + """ + postprocess ouput of network, one face at a time. + """ + output = [] + _cur_id = -1 + for idx, _result in enumerate(res): + if _result['id'] != _cur_id: + _cur_id = _result['id'] + output.append({'data': []}) + _result['points'][:, 0] *= (_result['x2'] - _result['x1']) + _result['points'][:, 0] += _result['x1'] + _result['points'][:, 1] *= (_result['y2'] - _result['y1']) + _result['points'][:, 1] += _result['y1'] + output[-1]['data'].append(_result['points'].tolist()) + + if visualization: + check_dir(output_dir) + for idx, sample in enumerate(output): + orig_im = res[idx]['orig_im'] + for points in sample['data']: + for x, y in points: + cv2.circle(orig_im, (int(x), int(y)), 1, (0, 0, 255), 2) + orig_im_path = res[idx]['orig_im_path'] + ext = os.path.splitext(orig_im_path) if orig_im_path else '' + ext = ext if ext else get_image_ext(orig_im) + org_im_path = orig_im_path if orig_im_path else 'ndarray_{}{}'.format(time.time(), ext) + im_name = os.path.basename(org_im_path) + im_save_path = os.path.join(output_dir, im_name) + sample['save_path'] = im_save_path + cv2.imwrite(im_save_path, orig_im) + + return output diff --git a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md similarity index 100% rename from hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md rename to modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md diff --git a/hub_module/modules/image/keypoint_detection/face_landmark_localization/__init__.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/__init__.py similarity index 100% rename from hub_module/modules/image/keypoint_detection/face_landmark_localization/__init__.py rename to modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/__init__.py diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..a86e90cff0993792359cbea6160b87c149f00158 --- /dev/null +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py @@ -0,0 +1,51 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + im = element['org_im'].copy() + im = cv2.resize(im, (384, 384)) + im = im.astype('float32') + im = im.transpose((2, 0, 1)) / 255 + im -= np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) + im /= np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + element['image'] = im + yield element diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py new file mode 100644 index 0000000000000000000000000000000000000000..43bf5b84e4c7e5c0b5cc758d895394d965e17012 --- /dev/null +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py @@ -0,0 +1,179 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from human_pose_estimation_resnet50_mpii.processor import base64_to_cv2, postprocess +from human_pose_estimation_resnet50_mpii.data_feed import reader +from human_pose_estimation_resnet50_mpii.pose_resnet import ResNet + + +@moduleinfo( + name="human_pose_estimation_resnet50_mpii", + type="CV/keypoint_detection", + author="paddlepaddle", + author_email="paddle-dev@baidu.comi", + summary= + "Paddle implementation for the paper `Simple baselines for human pose estimation and tracking`, trained with the MPII dataset.", + version="1.1.1") +class HumanPoseEstimation(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pose-resnet50-mpii-384x384") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def keypoint_detection(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + output_dir='output_pose', + visualization=False): + """ + API for human pose estimation and tracking. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + + Returns: + res (list[dict]): each element of res is a dict, keys contains 'path', 'data', the corresponding valus are: + path (str): the path of original image. + data (OrderedDict): The key points of human pose. + """ + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + out_heatmaps=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.keypoint_detection(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the human_pose_estimation_resnet50_mpii module.", + prog='hub run human_pose_estimation_resnet50_mpii', + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.keypoint_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='output_pose', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f5a7638a21c193ef2569dfa4ed9d0a02dbc9f449 --- /dev/null +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py @@ -0,0 +1,157 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid + +__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"] + +BN_MOMENTUM = 0.9 + + +class ResNet(): + def __init__(self, layers=50, kps_num=16, test_mode=False): + """ + :param layers: int, the layers number which is used here + :param kps_num: int, the number of keypoints in accord with the dataset + :param test_mode: bool, if True, only return output heatmaps, no loss + + :return: loss, output heatmaps + """ + self.k = kps_num + self.layers = layers + self.test_mode = test_mode + + def net(self, input, target=None, target_weight=None): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + conv = self.bottleneck_block( + input=conv, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1) + + conv = fluid.layers.conv2d_transpose( + input=conv, + num_filters=256, + filter_size=4, + padding=1, + stride=2, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), + act=None, + bias_attr=False) + conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM) + conv = fluid.layers.conv2d_transpose( + input=conv, + num_filters=256, + filter_size=4, + padding=1, + stride=2, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), + act=None, + bias_attr=False) + conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM) + conv = fluid.layers.conv2d_transpose( + input=conv, + num_filters=256, + filter_size=4, + padding=1, + stride=2, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), + act=None, + bias_attr=False) + conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM) + + out = fluid.layers.conv2d( + input=conv, + num_filters=self.k, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001))) + + if self.test_mode: + return out + else: + loss = self.calc_loss(out, target, target_weight) + return loss, out + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act, momentum=BN_MOMENTUM) + + def shortcut(self, input, ch_out, stride): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride) + else: + return input + + def calc_loss(self, heatmap, target, target_weight): + _, c, h, w = heatmap.shape + x = fluid.layers.reshape(heatmap, (-1, self.k, h * w)) + y = fluid.layers.reshape(target, (-1, self.k, h * w)) + w = fluid.layers.reshape(target_weight, (-1, self.k)) + + x = fluid.layers.split(x, num_or_sections=self.k, dim=1) + y = fluid.layers.split(y, num_or_sections=self.k, dim=1) + w = fluid.layers.split(w, num_or_sections=self.k, dim=1) + + _list = [] + for idx in range(self.k): + _tmp = fluid.layers.scale(x=x[idx] - y[idx], scale=1.) + _tmp = _tmp * _tmp + _tmp = fluid.layers.reduce_mean(_tmp, dim=2) + _list.append(_tmp * w[idx]) + + _loss = fluid.layers.concat(_list, axis=0) + _loss = fluid.layers.reduce_mean(_loss) + return 0.5 * _loss + + def bottleneck_block(self, input, num_filters, stride): + conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=1, act='relu') + conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu') + conv2 = self.conv_bn_layer(input=conv1, num_filters=num_filters * 4, filter_size=1, act=None) + + short = self.shortcut(input, num_filters * 4, stride) + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + +def ResNet50(): + model = ResNet(layers=50) + return model + + +def ResNet101(): + model = ResNet(layers=101) + return model + + +def ResNet152(): + model = ResNet(layers=152) + return model diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/processor.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ee3f4871d2b8b9d0e847de24389f052b4df22984 --- /dev/null +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/processor.py @@ -0,0 +1,145 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['base64_to_cv2', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_max_preds(batch_heatmaps): + """ + Get predictions from score maps. + + Args: + batch_heatmaps (numpy.ndarray): output of the network, with shape [N, C, H, W] + """ + assert isinstance(batch_heatmaps, np.ndarray), \ + 'batch_heatmaps should be numpy.ndarray' + assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' + + batch_size = batch_heatmaps.shape[0] + num_joints = batch_heatmaps.shape[1] + width = batch_heatmaps.shape[3] + heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) + idx = np.argmax(heatmaps_reshaped, 2) + maxvals = np.amax(heatmaps_reshaped, 2) + maxvals = maxvals.reshape((batch_size, num_joints, 1)) + idx = idx.reshape((batch_size, num_joints, 1)) + preds = np.tile(idx, (1, 1, 2)).astype(np.float32) + preds[:, :, 0] = (preds[:, :, 0]) % width + preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) + pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) + pred_mask = pred_mask.astype(np.float32) + preds *= pred_mask + return preds, maxvals + + +def predict_results(batch_heatmaps): + batch_size, num_joints, heatmap_height, heatmap_width = batch_heatmaps.shape + preds, maxvals = get_max_preds(batch_heatmaps) + return preds[0] * 4, num_joints + + +def postprocess(out_heatmaps, org_im, org_im_shape, org_im_path, output_dir, visualization): + """ + Postprocess output of network. one image at a time. + + Args: + out_heatmaps (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + res (dict): Output of postprocess. keys contains 'path', 'data', the corresponding valus are: + path (str): the path of original image. + data (OrderedDict): The key points of human pose. + """ + res = dict() + res['path'] = org_im_path + res['data'] = OrderedDict() + preds, num_joints = predict_results(out_heatmaps) + scale_horizon = org_im_shape[1] * 1.0 / 384 + scale_vertical = org_im_shape[0] * 1.0 / 384 + preds = np.multiply(preds, (scale_horizon, scale_vertical)).astype(int) + if visualization: + icolor = (255, 137, 0) + ocolor = (138, 255, 0) + rendered_im = org_im.copy() + for j in range(num_joints): + x, y = preds[j] + cv2.circle(rendered_im, (x, y), 3, icolor, -1, 16) + cv2.circle(rendered_im, (x, y), 6, ocolor, 1, 16) + check_dir(output_dir) + save_im_name = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_name, rendered_im) + print('image saved in {}'.format(save_im_name)) + + # articulation + preds = list(map(lambda pred: [int(_) for _ in pred], preds)) + res['data']['left_ankle'] = list(preds[0]) + res['data']['left_knee'] = list(preds[1]) + res['data']['left_hip'] = list(preds[2]) + res['data']['right_hip'] = list(preds[3]) + res['data']['right_knee'] = list(preds[4]) + res['data']['right_ankle'] = list(preds[5]) + res['data']['pelvis'] = list(preds[6]) + res['data']['thorax'] = list(preds[7]) + res['data']['upper_neck'] = list(preds[8]) + res['data']['head_top'] = list(preds[9]) + res['data']['right_wrist'] = list(preds[10]) + res['data']['right_elbow'] = list(preds[11]) + res['data']['right_shoulder'] = list(preds[12]) + res['data']['left_shoulder'] = list(preds[13]) + res['data']['left_elbow'] = list(preds[14]) + res['data']['left_wrist'] = list(preds[15]) + + return res + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of original image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + # extension + ext = '.jpg' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/keypoint_detection/openpose_body_estimation/module.py b/modules/image/keypoint_detection/openpose_body_estimation/module.py new file mode 100644 index 0000000000000000000000000000000000000000..bcc2d85daa1346bfc9bb0bbe054eb23b98ce2aa8 --- /dev/null +++ b/modules/image/keypoint_detection/openpose_body_estimation/module.py @@ -0,0 +1,214 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import copy +from collections import OrderedDict + +import cv2 +import paddle +import paddle.nn as nn +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.process.transforms as T + +import openpose_body_estimation.processor as P + + +@moduleinfo( + name="openpose_body_estimation", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="Openpose_body_estimation is a body pose estimation model based on Realtime Multi-Person 2D Pose \ + Estimation using Part Affinity Fields.", + version="1.0.0") +class BodyPoseModel(nn.Layer): + """ + BodyPoseModel + + Args: + load_checkpoint(str): Checkpoint save path, default is None. + visualization (bool): Whether to save the estimation result. Default is True. + """ + + def __init__(self, load_checkpoint: str = None, visualization: bool = True): + super(BodyPoseModel, self).__init__() + + self.resize_func = T.ResizeScaling() + self.norm_func = T.Normalize(std=[1, 1, 1]) + self.pad_func = P.PadDownRight() + self.remove_pad = P.RemovePadding() + self.get_peak = P.GetPeak() + self.get_connection = P.Connection() + self.get_candidate = P.Candidate() + self.draw_pose = P.DrawPose() + self.visualization = visualization + + no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1', \ + 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2', \ + 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1', \ + 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1'] + blocks = {} + block0 = OrderedDict([('conv1_1', [3, 64, 3, 1, 1]), ('conv1_2', [64, 64, 3, 1, 1]), ('pool1_stage1', [2, 2, + 0]), + ('conv2_1', [64, 128, 3, 1, 1]), ('conv2_2', [128, 128, 3, 1, 1]), + ('pool2_stage1', [2, 2, 0]), ('conv3_1', [128, 256, 3, 1, 1]), + ('conv3_2', [256, 256, 3, 1, 1]), ('conv3_3', [256, 256, 3, 1, 1]), + ('conv3_4', [256, 256, 3, 1, 1]), ('pool3_stage1', [2, 2, 0]), + ('conv4_1', [256, 512, 3, 1, 1]), ('conv4_2', [512, 512, 3, 1, 1]), + ('conv4_3_CPM', [512, 256, 3, 1, 1]), ('conv4_4_CPM', [256, 128, 3, 1, 1])]) + + block1_1 = OrderedDict([('conv5_1_CPM_L1', [128, 128, 3, 1, 1]), ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]), ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]), + ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])]) + + block1_2 = OrderedDict([('conv5_1_CPM_L2', [128, 128, 3, 1, 1]), ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]), ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]), + ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])]) + blocks['block1_1'] = block1_1 + blocks['block1_2'] = block1_2 + + self.model0 = self.make_layers(block0, no_relu_layers) + + for i in range(2, 7): + blocks['block%d_1' % i] = OrderedDict([('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]), + ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])]) + + blocks['block%d_2' % i] = OrderedDict([('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]), + ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])]) + + for k in blocks.keys(): + blocks[k] = self.make_layers(blocks[k], no_relu_layers) + + self.model1_1 = blocks['block1_1'] + self.model2_1 = blocks['block2_1'] + self.model3_1 = blocks['block3_1'] + self.model4_1 = blocks['block4_1'] + self.model5_1 = blocks['block5_1'] + self.model6_1 = blocks['block6_1'] + + self.model1_2 = blocks['block1_2'] + self.model2_2 = blocks['block2_2'] + self.model3_2 = blocks['block3_2'] + self.model4_2 = blocks['block4_2'] + self.model5_2 = blocks['block5_2'] + self.model6_2 = blocks['block6_2'] + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'openpose_body.pdparams') + if not os.path.exists(checkpoint): + os.system('wget https://paddlehub.bj.bcebos.com/dygraph/pose/openpose_body.pdparams -O ' + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def make_layers(self, block: dict, no_relu_layers: list): + layers = [] + for layer_name, v in block.items(): + if 'pool' in layer_name: + layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2]) + layers.append((layer_name, layer)) + else: + conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) + layers.append((layer_name, conv2d)) + if layer_name not in no_relu_layers: + layers.append(('relu_' + layer_name, nn.ReLU())) + layers = tuple(layers) + return nn.Sequential(*layers) + + def transform(self, orgimg: np.ndarray, scale_search: float = 0.5): + process = self.resize_func(orgimg, scale_search) + imageToTest_padded, pad = self.pad_func(process) + process = self.norm_func(imageToTest_padded) + process = np.ascontiguousarray(np.transpose(process[:, :, :, np.newaxis], (3, 2, 0, 1))).astype("float32") + + return process, imageToTest_padded, pad + + def forward(self, x: paddle.Tensor): + + out1 = self.model0(x) + + out1_1 = self.model1_1(out1) + out1_2 = self.model1_2(out1) + out2 = paddle.concat([out1_1, out1_2, out1], 1) + + out2_1 = self.model2_1(out2) + out2_2 = self.model2_2(out2) + out3 = paddle.concat([out2_1, out2_2, out1], 1) + + out3_1 = self.model3_1(out3) + out3_2 = self.model3_2(out3) + out4 = paddle.concat([out3_1, out3_2, out1], 1) + + out4_1 = self.model4_1(out4) + out4_2 = self.model4_2(out4) + out5 = paddle.concat([out4_1, out4_2, out1], 1) + + out5_1 = self.model5_1(out5) + out5_2 = self.model5_2(out5) + out6 = paddle.concat([out5_1, out5_2, out1], 1) + + out6_1 = self.model6_1(out6) + out6_2 = self.model6_2(out6) + + return out6_1, out6_2 + + def predict(self, img_path: str, save_path: str = "result"): + self.eval() + orgImg = cv2.imread(img_path) + data, imageToTest_padded, pad = self.transform(orgImg) + Mconv7_stage6_L1, Mconv7_stage6_L2 = self.forward(paddle.to_tensor(data)) + Mconv7_stage6_L1 = Mconv7_stage6_L1.numpy() + Mconv7_stage6_L2 = Mconv7_stage6_L2.numpy() + + heatmap_avg = self.remove_pad(Mconv7_stage6_L2, imageToTest_padded, orgImg, pad) + paf_avg = self.remove_pad(Mconv7_stage6_L1, imageToTest_padded, orgImg, pad) + + all_peaks = self.get_peak(heatmap_avg) + connection_all, special_k = self.get_connection(all_peaks, paf_avg, orgImg) + candidate, subset = self.get_candidate(all_peaks, connection_all, special_k) + + if self.visualization: + canvas = copy.deepcopy(orgImg) + canvas = self.draw_pose(canvas, candidate, subset) + if not os.path.exists(save_path): + os.mkdir(save_path) + save_path = os.path.join(save_path, img_path.rsplit("/", 1)[-1]) + cv2.imwrite(save_path, canvas) + return candidate, subset + + +if __name__ == "__main__": + + paddle.disable_static() + model = BodyPoseModel() + model.eval() + out1, out2 = model.predict("demo.jpg") diff --git a/modules/image/keypoint_detection/openpose_body_estimation/processor.py b/modules/image/keypoint_detection/openpose_body_estimation/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..172052c0199ed5aa74df55a610b749cd72007703 --- /dev/null +++ b/modules/image/keypoint_detection/openpose_body_estimation/processor.py @@ -0,0 +1,311 @@ +import math + +import cv2 +import numpy as np +from scipy.ndimage.filters import gaussian_filter + + +class PadDownRight: + """ + Get padding images. + + Args: + stride(int): Stride for calculate pad value for edges. + padValue(int): Initialization for new area. + """ + + def __init__(self, stride: int = 8, padValue: int = 128): + self.stride = stride + self.padValue = padValue + + def __call__(self, img: np.ndarray): + h, w = img.shape[0:2] + pad = 4 * [0] + pad[2] = 0 if (h % self.stride == 0) else self.stride - (h % self.stride) # down + pad[3] = 0 if (w % self.stride == 0) else self.stride - (w % self.stride) # right + + img_padded = img + pad_up = np.tile(img_padded[0:1, :, :] * 0 + self.padValue, (pad[0], 1, 1)) + img_padded = np.concatenate((pad_up, img_padded), axis=0) + pad_left = np.tile(img_padded[:, 0:1, :] * 0 + self.padValue, (1, pad[1], 1)) + img_padded = np.concatenate((pad_left, img_padded), axis=1) + pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + self.padValue, (pad[2], 1, 1)) + img_padded = np.concatenate((img_padded, pad_down), axis=0) + pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + self.padValue, (1, pad[3], 1)) + img_padded = np.concatenate((img_padded, pad_right), axis=1) + + return img_padded, pad + + +class RemovePadding: + """ + Remove the padding values. + + Args: + stride(int): Scales for resizing the images. + + """ + + def __init__(self, stride: int = 8): + self.stride = stride + + def __call__(self, data: np.ndarray, imageToTest_padded: np.ndarray, oriImg: np.ndarray, pad: list) -> np.ndarray: + heatmap = np.transpose(np.squeeze(data), (1, 2, 0)) + heatmap = cv2.resize(heatmap, (0, 0), fx=self.stride, fy=self.stride, interpolation=cv2.INTER_CUBIC) + heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) + + return heatmap + + +class GetPeak: + """ + Get peak values and coordinate from input. + + Args: + thresh(float): Threshold value for selecting peak value, default is 0.1. + """ + + def __init__(self, thresh=0.1): + self.thresh = thresh + + def __call__(self, heatmap: np.ndarray): + all_peaks = [] + peak_counter = 0 + for part in range(18): + map_ori = heatmap[:, :, part] + one_heatmap = gaussian_filter(map_ori, sigma=3) + + map_left = np.zeros(one_heatmap.shape) + map_left[1:, :] = one_heatmap[:-1, :] + map_right = np.zeros(one_heatmap.shape) + map_right[:-1, :] = one_heatmap[1:, :] + map_up = np.zeros(one_heatmap.shape) + map_up[:, 1:] = one_heatmap[:, :-1] + map_down = np.zeros(one_heatmap.shape) + map_down[:, :-1] = one_heatmap[:, 1:] + + peaks_binary = np.logical_and.reduce( + (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, + one_heatmap > self.thresh)) + peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse + peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks] + peak_id = range(peak_counter, peak_counter + len(peaks)) + peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i], ) for i in range(len(peak_id))] + + all_peaks.append(peaks_with_score_and_id) + peak_counter += len(peaks) + + return all_peaks + + +class Connection: + """ + Get connection for selected estimation points. + + Args: + mapIdx(list): Part Affinity Fields map index, default is None. + limbSeq(list): Peak candidate map index, default is None. + + """ + + def __init__(self, mapIdx: list = None, limbSeq: list = None): + if mapIdx and limbSeq: + self.mapIdx = mapIdx + self.limbSeq = limbSeq + else: + self.mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ + [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ + [55, 56], [37, 38], [45, 46]] + + self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ + [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ + [1, 16], [16, 18], [3, 17], [6, 18]] + self.caculate_vector = CalculateVector() + + def __call__(self, all_peaks: list, paf_avg: np.ndarray, orgimg: np.ndarray): + connection_all = [] + special_k = [] + for k in range(len(self.mapIdx)): + score_mid = paf_avg[:, :, [x - 19 for x in self.mapIdx[k]]] + candA = all_peaks[self.limbSeq[k][0] - 1] + candB = all_peaks[self.limbSeq[k][1] - 1] + nA = len(candA) + nB = len(candB) + if nA != 0 and nB != 0: + connection_candidate = self.caculate_vector(candA, candB, nA, nB, score_mid, orgimg) + connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) + connection = np.zeros((0, 5)) + for c in range(len(connection_candidate)): + i, j, s = connection_candidate[c][0:3] + if i not in connection[:, 3] and j not in connection[:, 4]: + connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) + if len(connection) >= min(nA, nB): + break + + connection_all.append(connection) + else: + special_k.append(k) + connection_all.append([]) + + return connection_all, special_k + + +class CalculateVector: + """ + Vector decomposition and normalization, refer Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields + for more details. + + Args: + thresh(float): Threshold value for selecting candidate vector, default is 0.05. + """ + + def __init__(self, thresh: float = 0.05): + self.thresh = thresh + + def __call__(self, candA: list, candB: list, nA: int, nB: int, score_mid: np.ndarray, oriImg: np.ndarray): + connection_candidate = [] + for i in range(nA): + for j in range(nB): + vec = np.subtract(candB[j][:2], candA[i][:2]) + norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) + 1e-5 + vec = np.divide(vec, norm) + + startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=10), \ + np.linspace(candA[i][1], candB[j][1], num=10))) + + vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ + for I in range(len(startend))]) + vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ + for I in range(len(startend))]) + + score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) + score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(0.5 * oriImg.shape[0] / norm - 1, 0) + criterion1 = len(np.nonzero(score_midpts > self.thresh)[0]) > 0.8 * len(score_midpts) + criterion2 = score_with_dist_prior > 0 + if criterion1 and criterion2: + connection_candidate.append( + [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) + return connection_candidate + + +class DrawPose: + """ + Draw Pose estimation results on canvas. + + Args: + stickwidth(int): Angle value to draw approximate ellipse curve, default is 4. + + """ + + def __init__(self, stickwidth: int = 4): + self.stickwidth = stickwidth + + self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], + [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]] + + self.colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], + [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], + [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], + [255, 0, 170], [255, 0, 85]] + + def __call__(self, canvas: np.ndarray, candidate: np.ndarray, subset: np.ndarray): + for i in range(18): + for n in range(len(subset)): + index = int(subset[n][i]) + if index == -1: + continue + x, y = candidate[index][0:2] + cv2.circle(canvas, (int(x), int(y)), 4, self.colors[i], thickness=-1) + for i in range(17): + for n in range(len(subset)): + index = subset[n][np.array(self.limbSeq[i]) - 1] + if -1 in index: + continue + cur_canvas = canvas.copy() + Y = candidate[index.astype(int), 0] + X = candidate[index.astype(int), 1] + mX = np.mean(X) + mY = np.mean(Y) + length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), self.stickwidth), \ + int(angle), 0, 360, 1) + cv2.fillConvexPoly(cur_canvas, polygon, self.colors[i]) + canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) + return canvas + + +class Candidate: + """ + Select candidate for body pose estimation. + + Args: + mapIdx(list): Part Affinity Fields map index, default is None. + limbSeq(list): Peak candidate map index, default is None. + """ + + def __init__(self, mapIdx: list = None, limbSeq: list = None): + if mapIdx and limbSeq: + self.mapIdx = mapIdx + self.limbSeq = limbSeq + else: + self.mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ + [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ + [55, 56], [37, 38], [45, 46]] + self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ + [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ + [1, 16], [16, 18], [3, 17], [6, 18]] + + def __call__(self, all_peaks: list, connection_all: list, special_k: list): + subset = -1 * np.ones((0, 20)) + candidate = np.array([item for sublist in all_peaks for item in sublist]) + for k in range(len(self.mapIdx)): + if k not in special_k: + partAs = connection_all[k][:, 0] + partBs = connection_all[k][:, 1] + indexA, indexB = np.array(self.limbSeq[k]) - 1 + + for i in range(len(connection_all[k])): # = 1:size(temp,1) + found = 0 + subset_idx = [-1, -1] + for j in range(len(subset)): # 1:size(subset,1): + if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: + subset_idx[found] = j + found += 1 + + if found == 1: + j = subset_idx[0] + if subset[j][indexB] != partBs[i]: + subset[j][indexB] = partBs[i] + subset[j][-1] += 1 + subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] + elif found == 2: # if found 2 and disjoint, merge them + j1, j2 = subset_idx + membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] + if len(np.nonzero(membership == 2)[0]) == 0: # merge + subset[j1][:-2] += (subset[j2][:-2] + 1) + subset[j1][-2:] += subset[j2][-2:] + subset[j1][-2] += connection_all[k][i][2] + subset = np.delete(subset, j2, 0) + else: # as like found == 1 + subset[j1][indexB] = partBs[i] + subset[j1][-1] += 1 + subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] + + # if find no partA in the subset, create a new subset + elif not found and k < 17: + row = -1 * np.ones(20) + row[indexA] = partAs[i] + row[indexB] = partBs[i] + row[-1] = 2 + row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] + subset = np.vstack([subset, row]) + # delete some rows of subset which has few parts occur + deleteIdx = [] + for i in range(len(subset)): + if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: + deleteIdx.append(i) + subset = np.delete(subset, deleteIdx, axis=0) + + return candidate, subset diff --git a/modules/image/keypoint_detection/openpose_hands_estimation/module.py b/modules/image/keypoint_detection/openpose_hands_estimation/module.py new file mode 100644 index 0000000000000000000000000000000000000000..4429597d90d32ae330f82b3063ee842fe15fea9a --- /dev/null +++ b/modules/image/keypoint_detection/openpose_hands_estimation/module.py @@ -0,0 +1,206 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import copy +from collections import OrderedDict + +import cv2 +import paddle +import numpy as np +import paddle.nn as nn +import paddlehub as hub +from skimage.measure import label +from scipy.ndimage.filters import gaussian_filter +from paddlehub.module.module import moduleinfo +from paddlehub.process.functional import npmax +import paddlehub.process.transforms as T + +import openpose_hands_estimation.processor as P + + +@moduleinfo( + name="openpose_hands_estimation", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="Openpose_hands_estimation is a hand pose estimation model based on Hand Keypoint Detection in \ + Single Images using Multiview Bootstrapping.", + version="1.0.0") +class HandPoseModel(nn.Layer): + """ + HandPoseModel + + Args: + load_checkpoint(str): Checkpoint save path, default is None. + visualization (bool): Whether to save the estimation result. Default is True. + """ + + def __init__(self, load_checkpoint: str = None, visualization: bool = True): + super(HandPoseModel, self).__init__() + + self.visualization = visualization + self.resize_func = T.ResizeScaling() + self.norm_func = T.Normalize(std=[1, 1, 1]) + self.hand_detect = P.HandDetect() + self.pad_func = P.PadDownRight() + self.remove_pad = P.RemovePadding() + self.draw_pose = P.DrawPose() + self.draw_hand = P.DrawHandPose() + no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3', \ + 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'] + + block1_0 = OrderedDict([('conv1_1', [3, 64, 3, 1, 1]), ('conv1_2', [64, 64, 3, 1, 1]), + ('pool1_stage1', [2, 2, 0]), ('conv2_1', [64, 128, 3, 1, 1]), + ('conv2_2', [128, 128, 3, 1, 1]), ('pool2_stage1', [2, 2, 0]), + ('conv3_1', [128, 256, 3, 1, 1]), ('conv3_2', [256, 256, 3, 1, 1]), + ('conv3_3', [256, 256, 3, 1, 1]), ('conv3_4', [256, 256, 3, 1, 1]), + ('pool3_stage1', [2, 2, 0]), ('conv4_1', [256, 512, 3, 1, 1]), + ('conv4_2', [512, 512, 3, 1, 1]), ('conv4_3', [512, 512, 3, 1, 1]), + ('conv4_4', [512, 512, 3, 1, 1]), ('conv5_1', [512, 512, 3, 1, 1]), + ('conv5_2', [512, 512, 3, 1, 1]), ('conv5_3_CPM', [512, 128, 3, 1, 1])]) + + block1_1 = OrderedDict([('conv6_1_CPM', [128, 512, 1, 1, 0]), ('conv6_2_CPM', [512, 22, 1, 1, 0])]) + + blocks = {} + blocks['block1_0'] = block1_0 + blocks['block1_1'] = block1_1 + + for i in range(2, 7): + blocks['block%d' % i] = OrderedDict([('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]), + ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])]) + + for k in blocks.keys(): + blocks[k] = self.make_layers(blocks[k], no_relu_layers) + + self.model1_0 = blocks['block1_0'] + self.model1_1 = blocks['block1_1'] + self.model2 = blocks['block2'] + self.model3 = blocks['block3'] + self.model4 = blocks['block4'] + self.model5 = blocks['block5'] + self.model6 = blocks['block6'] + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'openpose_hand.pdparams') + if not os.path.exists(checkpoint): + os.system('wget https://paddlehub.bj.bcebos.com/dygraph/pose/openpose_hand.pdparams -O ' + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def make_layers(self, block: dict, no_relu_layers: list): + layers = [] + for layer_name, v in block.items(): + if 'pool' in layer_name: + layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2]) + layers.append((layer_name, layer)) + else: + conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) + layers.append((layer_name, conv2d)) + if layer_name not in no_relu_layers: + layers.append(('relu_' + layer_name, nn.ReLU())) + layers = tuple(layers) + return nn.Sequential(*layers) + + def forward(self, x: paddle.Tensor): + out1_0 = self.model1_0(x) + out1_1 = self.model1_1(out1_0) + concat_stage2 = paddle.concat([out1_1, out1_0], 1) + out_stage2 = self.model2(concat_stage2) + concat_stage3 = paddle.concat([out_stage2, out1_0], 1) + out_stage3 = self.model3(concat_stage3) + concat_stage4 = paddle.concat([out_stage3, out1_0], 1) + out_stage4 = self.model4(concat_stage4) + concat_stage5 = paddle.concat([out_stage4, out1_0], 1) + out_stage5 = self.model5(concat_stage5) + concat_stage6 = paddle.concat([out_stage5, out1_0], 1) + out_stage6 = self.model6(concat_stage6) + return out_stage6 + + def hand_estimation(self, handimg: np.ndarray, scale_search: list): + heatmap_avg = np.zeros((handimg.shape[0], handimg.shape[1], 22)) + + for scale in scale_search: + process = self.resize_func(handimg, scale) + imageToTest_padded, pad = self.pad_func(process) + process = self.norm_func(imageToTest_padded) + process = np.ascontiguousarray(np.transpose(process[:, :, :, np.newaxis], (3, 2, 0, 1))).astype("float32") + data = self.forward(paddle.to_tensor(process)) + data = data.numpy() + heatmap = self.remove_pad(data, imageToTest_padded, handimg, pad) + heatmap_avg += heatmap / len(scale_search) + + all_peaks = [] + for part in range(21): + map_ori = heatmap_avg[:, :, part] + one_heatmap = gaussian_filter(map_ori, sigma=3) + binary = np.ascontiguousarray(one_heatmap > 0.05, dtype=np.uint8) + if np.sum(binary) == 0: + all_peaks.append([0, 0]) + continue + label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) + max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 + label_img[label_img != max_index] = 0 + map_ori[label_img == 0] = 0 + + y, x = npmax(map_ori) + all_peaks.append([x, y]) + + return np.array(all_peaks) + + def predict(self, img_path: str, save_path: str = 'result', scale: list = [0.5, 1.0, 1.5, 2.0]): + self.eval() + self.body_model = hub.Module(name='openpose_body_estimation') + self.body_model.eval() + org_img = cv2.imread(img_path) + + candidate, subset = self.body_model.predict(img_path) + hands_list = self.hand_detect(candidate, subset, org_img) + + all_hand_peaks = [] + + for x, y, w, is_left in hands_list: + peaks = self.hand_estimation(org_img[y:y + w, x:x + w, :], scale) + peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) + peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) + all_hand_peaks.append(peaks) + + if self.visualization: + canvas = copy.deepcopy(org_img) + canvas = self.draw_pose(canvas, candidate, subset) + canvas = self.draw_hand(canvas, all_hand_peaks) + if not os.path.exists(save_path): + os.mkdir(save_path) + save_path = os.path.join(save_path, img_path.rsplit("/", 1)[-1]) + cv2.imwrite(save_path, canvas) + return all_hand_peaks + + +if __name__ == "__main__": + paddle.disable_static() + model = HandPoseModel() + model.eval() + out1 = model.predict("detect_hand4.jpg") diff --git a/modules/image/keypoint_detection/openpose_hands_estimation/processor.py b/modules/image/keypoint_detection/openpose_hands_estimation/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ea16fc625433b29e3c6d01c1e4a2159731ce7b69 --- /dev/null +++ b/modules/image/keypoint_detection/openpose_hands_estimation/processor.py @@ -0,0 +1,212 @@ +import math + +import cv2 +import numpy as np +import matplotlib +from matplotlib import pyplot as plt +from matplotlib.figure import Figure +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +matplotlib.use('Agg') + + +class HandDetect: + """ + Detect hand pose information from body pose estimation result. + + Args: + ratioWristElbow(float): Ratio to adjust the wrist center, ,default is 0.33. + """ + + def __init__(self, ratioWristElbow: float = 0.33): + self.ratioWristElbow = ratioWristElbow + + def __call__(self, candidate: np.ndarray, subset: np.ndarray, oriImg: np.ndarray): + detect_result = [] + image_height, image_width = oriImg.shape[0:2] + for person in subset.astype(int): + has_left = np.sum(person[[5, 6, 7]] == -1) == 0 + has_right = np.sum(person[[2, 3, 4]] == -1) == 0 + if not (has_left or has_right): + continue + hands = [] + # left hand + if has_left: + left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]] + x1, y1 = candidate[left_shoulder_index][:2] + x2, y2 = candidate[left_elbow_index][:2] + x3, y3 = candidate[left_wrist_index][:2] + hands.append([x1, y1, x2, y2, x3, y3, True]) + # right hand + if has_right: + right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]] + x1, y1 = candidate[right_shoulder_index][:2] + x2, y2 = candidate[right_elbow_index][:2] + x3, y3 = candidate[right_wrist_index][:2] + hands.append([x1, y1, x2, y2, x3, y3, False]) + + for x1, y1, x2, y2, x3, y3, is_left in hands: + + x = x3 + self.ratioWristElbow * (x3 - x2) + y = y3 + self.ratioWristElbow * (y3 - y2) + distanceWristElbow = math.sqrt((x3 - x2)**2 + (y3 - y2)**2) + distanceElbowShoulder = math.sqrt((x2 - x1)**2 + (y2 - y1)**2) + width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder) + + x -= width / 2 + y -= width / 2 + + if x < 0: x = 0 + if y < 0: y = 0 + width1 = width + width2 = width + if x + width > image_width: width1 = image_width - x + if y + width > image_height: width2 = image_height - y + width = min(width1, width2) + + if width >= 20: + detect_result.append([int(x), int(y), int(width), is_left]) + + return detect_result + + +class PadDownRight: + """ + Get padding images. + + Args: + stride(int): Stride for calculate pad value for edges. + padValue(int): Initialization for new area. + """ + + def __init__(self, stride: int = 8, padValue: int = 128): + self.stride = stride + self.padValue = padValue + + def __call__(self, img: np.ndarray): + h, w = img.shape[0:2] + pad = 4 * [0] + pad[2] = 0 if (h % self.stride == 0) else self.stride - (h % self.stride) # down + pad[3] = 0 if (w % self.stride == 0) else self.stride - (w % self.stride) # right + + img_padded = img + pad_up = np.tile(img_padded[0:1, :, :] * 0 + self.padValue, (pad[0], 1, 1)) + img_padded = np.concatenate((pad_up, img_padded), axis=0) + pad_left = np.tile(img_padded[:, 0:1, :] * 0 + self.padValue, (1, pad[1], 1)) + img_padded = np.concatenate((pad_left, img_padded), axis=1) + pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + self.padValue, (pad[2], 1, 1)) + img_padded = np.concatenate((img_padded, pad_down), axis=0) + pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + self.padValue, (1, pad[3], 1)) + img_padded = np.concatenate((img_padded, pad_right), axis=1) + + return img_padded, pad + + +class RemovePadding: + """ + Remove the padding values. + + Args: + stride(int): Scales for resizing the images. + + """ + + def __init__(self, stride: int = 8): + self.stride = stride + + def __call__(self, data: np.ndarray, imageToTest_padded: np.ndarray, oriImg: np.ndarray, pad: list) -> np.ndarray: + heatmap = np.transpose(np.squeeze(data), (1, 2, 0)) + heatmap = cv2.resize(heatmap, (0, 0), fx=self.stride, fy=self.stride, interpolation=cv2.INTER_CUBIC) + heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) + + return heatmap + + +class DrawPose: + """ + Draw Pose estimation results on canvas. + + Args: + stickwidth(int): Angle value to draw approximate ellipse curve, default is 4. + + """ + + def __init__(self, stickwidth: int = 4): + self.stickwidth = stickwidth + + self.limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], + [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]] + + self.colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], + [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], + [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], + [255, 0, 170], [255, 0, 85]] + + def __call__(self, canvas: np.ndarray, candidate: np.ndarray, subset: np.ndarray): + for i in range(18): + for n in range(len(subset)): + index = int(subset[n][i]) + if index == -1: + continue + x, y = candidate[index][0:2] + cv2.circle(canvas, (int(x), int(y)), 4, self.colors[i], thickness=-1) + for i in range(17): + for n in range(len(subset)): + index = subset[n][np.array(self.limbSeq[i]) - 1] + if -1 in index: + continue + cur_canvas = canvas.copy() + Y = candidate[index.astype(int), 0] + X = candidate[index.astype(int), 1] + mX = np.mean(X) + mY = np.mean(Y) + length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), self.stickwidth), int(angle), 0, 360, + 1) + cv2.fillConvexPoly(cur_canvas, polygon, self.colors[i]) + canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) + return canvas + + +class DrawHandPose: + """ + Draw hand pose estimation results on canvas. + Args: + show_number(bool): Whether to show estimation ids in canvas, default is False. + + """ + + def __init__(self, show_number: bool = False): + self.edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \ + [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] + self.show_number = show_number + + def __call__(self, canvas: np.ndarray, all_hand_peaks: list): + fig = Figure(figsize=plt.figaspect(canvas)) + + fig.subplots_adjust(0, 0, 1, 1) + fig.subplots_adjust(bottom=0, top=1, left=0, right=1) + bg = FigureCanvas(fig) + ax = fig.subplots() + ax.axis('off') + ax.imshow(canvas) + + width, height = ax.figure.get_size_inches() * ax.figure.get_dpi() + + for peaks in all_hand_peaks: + for ie, e in enumerate(self.edges): + if np.sum(np.all(peaks[e], axis=1) == 0) == 0: + x1, y1 = peaks[e[0]] + x2, y2 = peaks[e[1]] + ax.plot([x1, x2], [y1, y2], + color=matplotlib.colors.hsv_to_rgb([ie / float(len(self.edges)), 1.0, 1.0])) + + for i, keyponit in enumerate(peaks): + x, y = keyponit + ax.plot(x, y, 'r.') + if self.show_number: + ax.text(x, y, str(i)) + bg.draw() + canvas = np.frombuffer(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3) + return canvas diff --git a/hub_module/modules/image/object_detection/README.md b/modules/image/object_detection/README.md similarity index 100% rename from hub_module/modules/image/object_detection/README.md rename to modules/image/object_detection/README.md diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md rename to modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md diff --git a/hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/__init__.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/__init__.py rename to modules/image/object_detection/faster_rcnn_resnet50_coco2017/__init__.py diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py rename to modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py new file mode 100644 index 0000000000000000000000000000000000000000..f84f65f7b3fccca0e8fe6d8c3806fb5c4ce66e39 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py @@ -0,0 +1,241 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal, Xavier +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import MSRA + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, + score_threshold=.05, + nms_top_k=-1, + keep_top_k=100, + nms_threshold=.5, + normalized=False, + nms_eta=1.0, + background_label=0): + super(MultiClassNMS, self).__init__() + self.score_threshold = score_threshold + self.nms_top_k = nms_top_k + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.normalized = normalized + self.nms_eta = nms_eta + self.background_label = background_label + + +class SmoothL1Loss(object): + ''' + Smooth L1 loss + Args: + sigma (float): hyper param in smooth l1 loss + ''' + + def __init__(self, sigma=1.0): + super(SmoothL1Loss, self).__init__() + self.sigma = sigma + + def __call__(self, x, y, inside_weight=None, outside_weight=None): + return fluid.layers.smooth_l1( + x, y, inside_weight=inside_weight, outside_weight=outside_weight, sigma=self.sigma) + + +class BoxCoder(object): + def __init__(self, prior_box_var=[0.1, 0.1, 0.2, 0.2], code_type='decode_center_size', box_normalized=False, + axis=1): + super(BoxCoder, self).__init__() + self.prior_box_var = prior_box_var + self.code_type = code_type + self.box_normalized = box_normalized + self.axis = axis + + +class TwoFCHead(object): + """ + RCNN head with two Fully Connected layers + Args: + mlp_dim (int): num of filters for the fc layers + """ + + def __init__(self, mlp_dim=1024): + super(TwoFCHead, self).__init__() + self.mlp_dim = mlp_dim + + def __call__(self, roi_feat): + fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] + + fc6 = fluid.layers.fc( + input=roi_feat, + size=self.mlp_dim, + act='relu', + name='fc6', + param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) + head_feat = fluid.layers.fc( + input=fc6, + size=self.mlp_dim, + act='relu', + name='fc7', + param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), + bias_attr=ParamAttr(name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) + + return head_feat + + +class BBoxHead(object): + """ + RCNN bbox head + + Args: + head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` + box_coder (object): `BoxCoder` instance + nms (object): `MultiClassNMS` instance + num_classes: number of output classes + """ + __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] + __shared__ = ['num_classes'] + + def __init__(self, head, box_coder=BoxCoder(), nms=MultiClassNMS(), bbox_loss=SmoothL1Loss(), num_classes=81): + super(BBoxHead, self).__init__() + self.head = head + self.num_classes = num_classes + self.box_coder = box_coder + self.nms = nms + self.bbox_loss = bbox_loss + self.head_feat = None + + def get_head_feat(self, input=None): + """ + Get the bbox head feature map. + """ + + if input is not None: + feat = self.head(input) + if isinstance(feat, OrderedDict): + feat = list(feat.values())[0] + self.head_feat = feat + return self.head_feat + + def _get_output(self, roi_feat): + """ + Get bbox head output. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + + Returns: + cls_score(Variable): Output of rpn head with shape of + [N, num_anchors, H, W]. + bbox_pred(Variable): Output of rpn head with shape of + [N, num_anchors * 4, H, W]. + """ + head_feat = self.get_head_feat(roi_feat) + # when ResNetC5 output a single feature map + if not isinstance(self.head, TwoFCHead): + head_feat = fluid.layers.pool2d(head_feat, pool_type='avg', global_pooling=True) + cls_score = fluid.layers.fc( + input=head_feat, + size=self.num_classes, + act=None, + name='cls_score', + param_attr=ParamAttr(name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), + bias_attr=ParamAttr(name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) + bbox_pred = fluid.layers.fc( + input=head_feat, + size=4 * self.num_classes, + act=None, + name='bbox_pred', + param_attr=ParamAttr(name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), + bias_attr=ParamAttr(name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) + return cls_score, bbox_pred + + def get_loss(self, roi_feat, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): + """ + Get bbox_head loss. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + labels_int32(Variable): Class label of a RoI with shape [P, 1]. + P is the number of RoI. + bbox_targets(Variable): Box label of a RoI with shape + [P, 4 * class_nums]. + bbox_inside_weights(Variable): Indicates whether a box should + contribute to loss. Same shape as bbox_targets. + bbox_outside_weights(Variable): Indicates whether a box should + contribute to loss. Same shape as bbox_targets. + + Return: + Type: Dict + loss_cls(Variable): bbox_head loss. + loss_bbox(Variable): bbox_head loss. + """ + + cls_score, bbox_pred = self._get_output(roi_feat) + + labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') + labels_int64.stop_gradient = True + loss_cls = fluid.layers.softmax_with_cross_entropy( + logits=cls_score, label=labels_int64, numeric_stable_mode=True) + loss_cls = fluid.layers.reduce_mean(loss_cls) + loss_bbox = self.bbox_loss( + x=bbox_pred, y=bbox_targets, inside_weight=bbox_inside_weights, outside_weight=bbox_outside_weights) + loss_bbox = fluid.layers.reduce_mean(loss_bbox) + return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} + + def get_prediction(self, roi_feat, rois, im_info, im_shape, return_box_score=False): + """ + Get prediction bounding box in test stage. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + rois (Variable): Output of generate_proposals in rpn head. + im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the + number of input images, each element consists of im_height, + im_width, im_scale. + im_shape (Variable): Actual shape of original image with shape + [B, 3]. B is the number of images, each element consists of + original_height, original_width, 1 + + Returns: + pred_result(Variable): Prediction result with shape [N, 6]. Each + row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. + N is the total number of prediction. + """ + cls_score, bbox_pred = self._get_output(roi_feat) + + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + boxes = rois / im_scale + cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) + bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) + # self.box_coder + decoded_box = fluid.layers.box_coder( + prior_box=boxes, + target_box=bbox_pred, + prior_box_var=self.box_coder.prior_box_var, + code_type=self.box_coder.code_type, + box_normalized=self.box_coder.box_normalized, + axis=self.box_coder.axis) + cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) + if return_box_score: + return {'bbox': cliped_box, 'score': cls_prob} + # self.nms + pred_result = fluid.layers.multiclass_nms( + bboxes=cliped_box, + scores=cls_prob, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + normalized=self.nms.normalized, + nms_eta=self.nms.nms_eta, + background_label=self.nms.background_label) + return pred_result diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..fbc7cf9b00905142cdaa38b530f55ee6804f1463 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py @@ -0,0 +1,104 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +__all__ = ['test_reader'] + + +def test_reader(paths=None, images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (dict): key contains 'image' and 'im_info', the corresponding values is: + image (numpy.ndarray): the image to be fed into network + im_info (numpy.ndarray): the info about the preprocessed. + """ + img_list = list() + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + im = im.astype(np.float32, copy=False) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + target_size = 800 + max_size = 1333 + + shape = im.shape + # im_shape holds the original shape of image. + im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') + im_size_min = np.min(shape[0:2]) + im_size_max = np.max(shape[0:2]) + im_scale = float(target_size) / float(im_size_min) + if np.round(im_scale * im_size_max) > max_size: + im_scale = float(max_size) / float(im_size_max) + + resize_w = np.round(im_scale * float(shape[1])) + resize_h = np.round(im_scale * float(shape[0])) + # im_info holds the resize info of image. + im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') + + im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + + # HWC --> CHW + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + yield {'image': im, 'im_info': im_info, 'im_shape': im_shape} + + +def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): + max_shape_org = np.array([data['image'].shape for data in batch_data]).max(axis=0) + if coarsest_stride > 0: + max_shape = np.zeros((3)).astype('int32') + max_shape[1] = int(np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) + max_shape[2] = int(np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) + else: + max_shape = max_shape_org.astype('int32') + + padding_image = list() + padding_info = list() + padding_shape = list() + + for data in batch_data: + im_c, im_h, im_w = data['image'].shape + # image + padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), dtype=np.float32) + padding_im[:, 0:im_h, 0:im_w] = data['image'] + padding_image.append(padding_im) + # im_info + data['im_info'][0] = max_shape[1] if use_padded_im_info else max_shape_org[1] + data['im_info'][1] = max_shape[2] if use_padded_im_info else max_shape_org[2] + padding_info.append(data['im_info']) + padding_shape.append(data['im_shape']) + + padding_image = np.array(padding_image).astype('float32') + padding_info = np.array(padding_info).astype('float32') + padding_shape = np.array(padding_shape).astype('float32') + return padding_image, padding_info, padding_shape diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/label_file.txt b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/label_file.txt rename to modules/image/object_detection/faster_rcnn_resnet50_coco2017/label_file.txt diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ef3e7bf326209ac9efb9d9a099230694c16f37d8 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py @@ -0,0 +1,384 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import ast +import argparse +from collections import OrderedDict +from functools import partial +from math import ceil + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser +from paddlehub.common.paddle_helper import add_vars_prefix + +from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch +from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5 +from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead +from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss +from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner +from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign + + +@moduleinfo( + name="faster_rcnn_resnet50_coco2017", + version="1.1.0", + type="cv/object_detection", + summary="Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class FasterRCNNResNet50(hub.Module): + def _initialize(self): + # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] + self.default_pretrained_model_path = os.path.join(self.directory, "faster_rcnn_resnet50_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + num_classes (int): number of categories + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + phase (str): optional choices are 'train' and 'predict'. + + Returns: + inputs (dict): the input variables. + outputs (dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32') + # backbone + backbone = ResNet(norm_type='affine_channel', depth=50, feature_maps=4, freeze_at=2) + body_feats = backbone(image) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) + im_shape = fluid.layers.data(name='im_shape', shape=[3], dtype='float32', lod_level=0) + body_feat_names = list(body_feats.keys()) + # rpn_head: RPNHead + rpn_head = self.rpn_head() + rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) + # train + if phase == 'train': + gt_bbox = fluid.layers.data(name='gt_bbox', shape=[4], dtype='float32', lod_level=1) + is_crowd = fluid.layers.data(name='is_crowd', shape=[1], dtype='int32', lod_level=1) + gt_class = fluid.layers.data(name='gt_class', shape=[1], dtype='int32', lod_level=1) + rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) + # bbox_assigner: BBoxAssigner + bbox_assigner = self.bbox_assigner(num_classes) + outs = fluid.layers.generate_proposal_labels( + rpn_rois=rois, + gt_classes=gt_class, + is_crowd=is_crowd, + gt_boxes=gt_bbox, + im_info=im_info, + batch_size_per_im=bbox_assigner.batch_size_per_im, + fg_fraction=bbox_assigner.fg_fraction, + fg_thresh=bbox_assigner.fg_thresh, + bg_thresh_hi=bbox_assigner.bg_thresh_hi, + bg_thresh_lo=bbox_assigner.bg_thresh_lo, + bbox_reg_weights=bbox_assigner.bbox_reg_weights, + class_nums=bbox_assigner.class_nums, + use_random=bbox_assigner.use_random) + rois = outs[0] + + body_feat = body_feats[body_feat_names[-1]] + # roi_extractor: RoIAlign + roi_extractor = self.roi_extractor() + roi_feat = fluid.layers.roi_align( + input=body_feat, + rois=rois, + pooled_height=roi_extractor.pooled_height, + pooled_width=roi_extractor.pooled_width, + spatial_scale=roi_extractor.spatial_scale, + sampling_ratio=roi_extractor.sampling_ratio) + # head_feat + bbox_head = self.bbox_head(num_classes) + head_feat = bbox_head.head(roi_feat) + if isinstance(head_feat, OrderedDict): + head_feat = list(head_feat.values())[0] + if phase == 'train': + inputs = { + 'image': var_prefix + image.name, + 'im_info': var_prefix + im_info.name, + 'im_shape': var_prefix + im_shape.name, + 'gt_class': var_prefix + gt_class.name, + 'gt_bbox': var_prefix + gt_bbox.name, + 'is_crowd': var_prefix + is_crowd.name + } + outputs = { + 'head_features': var_prefix + head_feat.name, + 'rpn_cls_loss': var_prefix + rpn_loss['rpn_cls_loss'].name, + 'rpn_reg_loss': var_prefix + rpn_loss['rpn_reg_loss'].name, + 'generate_proposal_labels': [var_prefix + var.name for var in outs] + } + elif phase == 'predict': + pred = bbox_head.get_prediction(roi_feat, rois, im_info, im_shape) + inputs = { + 'image': var_prefix + image.name, + 'im_info': var_prefix + im_info.name, + 'im_shape': var_prefix + im_shape.name + } + outputs = { + 'head_features': var_prefix + head_feat.name, + 'rois': var_prefix + rois.name, + 'bbox_out': var_prefix + pred.name + } + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(startup_program, var_prefix) + + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = { + key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] + for key, value in outputs.items() + } + + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + if pretrained: + + def _if_exist(var): + if num_classes != 81: + if 'bbox_pred' in var.name or 'cls_score' in var.name: + return False + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + return inputs, outputs, context_prog + + def rpn_head(self): + return RPNHead( + anchor_generator=AnchorGenerator( + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + stride=[16.0, 16.0], + variance=[1.0, 1.0, 1.0, 1.0]), + rpn_target_assign=RPNTargetAssign( + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5, + rpn_negative_overlap=0.3, + rpn_positive_overlap=0.7, + rpn_straddle_thresh=0.0), + train_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=12000, pre_nms_top_n=2000), + test_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=6000, pre_nms_top_n=1000)) + + def roi_extractor(self): + return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625) + + def bbox_head(self, num_classes): + return BBoxHead( + head=ResNetC5(depth=50, norm_type='affine_channel'), + nms=MultiClassNMS(keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), + bbox_loss=SmoothL1Loss(), + num_classes=num_classes) + + def bbox_assigner(self, num_classes): + return BBoxAssigner( + batch_size_per_im=512, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + fg_fraction=0.25, + fg_thresh=0.5, + class_nums=num_classes) + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def object_detection(self, + paths=None, + images=None, + data=None, + use_gpu=False, + batch_size=1, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + paths = paths if paths else list() + if data and 'image' in data: + paths += data['image'] + + all_images = list() + for yield_return in test_reader(paths, images): + all_images.append(yield_return) + + images_num = len(all_images) + loop_num = ceil(images_num / batch_size) + res = [] + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + + padding_image, padding_info, padding_shape = padding_minibatch(batch_data) + padding_image_tensor = PaddleTensor(padding_image.copy()) + padding_info_tensor = PaddleTensor(padding_info.copy()) + padding_shape_tensor = PaddleTensor(padding_shape.copy()) + feed_list = [padding_image_tensor, padding_info_tensor, padding_shape_tensor] + if use_gpu: + data_out = self.gpu_predictor.run(feed_list) + else: + data_out = self.cpu_predictor.run(feed_list) + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=handle_id, + visualization=visualization) + res += output + return res + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = [] + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.object_detection(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py rename to modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..077b7be32afcf127531028d25593872af7c109b0 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py @@ -0,0 +1,159 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = [ + 'base64_to_cv2', + 'load_label_info', + 'postprocess', +] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..77a3f7f4c7b16c3f9c65c46fc93eb394befa5110 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py rename to modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a25e81b402ae2089d718ee5287af2b23caa3bafa --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py @@ -0,0 +1,260 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal +from paddle.fluid.regularizer import L2Decay + +__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead'] + + +class AnchorGenerator(object): + # __op__ = fluid.layers.anchor_generator + def __init__(self, + stride=[16.0, 16.0], + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1., 2.], + variance=[1., 1., 1., 1.]): + super(AnchorGenerator, self).__init__() + self.anchor_sizes = anchor_sizes + self.aspect_ratios = aspect_ratios + self.variance = variance + self.stride = stride + + +class RPNTargetAssign(object): + # __op__ = fluid.layers.rpn_target_assign + def __init__(self, + rpn_batch_size_per_im=256, + rpn_straddle_thresh=0., + rpn_fg_fraction=0.5, + rpn_positive_overlap=0.7, + rpn_negative_overlap=0.3, + use_random=True): + super(RPNTargetAssign, self).__init__() + self.rpn_batch_size_per_im = rpn_batch_size_per_im + self.rpn_straddle_thresh = rpn_straddle_thresh + self.rpn_fg_fraction = rpn_fg_fraction + self.rpn_positive_overlap = rpn_positive_overlap + self.rpn_negative_overlap = rpn_negative_overlap + self.use_random = use_random + + +class GenerateProposals(object): + # __op__ = fluid.layers.generate_proposals + def __init__(self, pre_nms_top_n=6000, post_nms_top_n=1000, nms_thresh=.5, min_size=.1, eta=1.): + super(GenerateProposals, self).__init__() + self.pre_nms_top_n = pre_nms_top_n + self.post_nms_top_n = post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = min_size + self.eta = eta + + +class RPNHead(object): + """ + RPN Head + + Args: + anchor_generator (object): `AnchorGenerator` instance + rpn_target_assign (object): `RPNTargetAssign` instance + train_proposal (object): `GenerateProposals` instance for training + test_proposal (object): `GenerateProposals` instance for testing + num_classes (int): number of classes in rpn output + """ + __inject__ = ['anchor_generator', 'rpn_target_assign', 'train_proposal', 'test_proposal'] + + def __init__(self, anchor_generator, rpn_target_assign, train_proposal, test_proposal, num_classes=1): + super(RPNHead, self).__init__() + self.anchor_generator = anchor_generator + self.rpn_target_assign = rpn_target_assign + self.train_proposal = train_proposal + self.test_proposal = test_proposal + self.num_classes = num_classes + + def _get_output(self, input): + """ + Get anchor and RPN head output. + + Args: + input(Variable): feature map from backbone with shape of [N, C, H, W] + + Returns: + rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. + rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. + """ + dim_out = input.shape[1] + rpn_conv = fluid.layers.conv2d( + input=input, + num_filters=dim_out, + filter_size=3, + stride=1, + padding=1, + act='relu', + name='conv_rpn', + param_attr=ParamAttr(name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + # Generate anchors self.anchor_generator + self.anchor, self.anchor_var = fluid.layers.anchor_generator( + input=rpn_conv, + anchor_sizes=self.anchor_generator.anchor_sizes, + aspect_ratios=self.anchor_generator.aspect_ratios, + variance=self.anchor_generator.variance, + stride=self.anchor_generator.stride) + + num_anchor = self.anchor.shape[2] + # Proposal classification scores + self.rpn_cls_score = fluid.layers.conv2d( + rpn_conv, + num_filters=num_anchor * self.num_classes, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_cls_score', + param_attr=ParamAttr(name="rpn_cls_logits_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="rpn_cls_logits_b", learning_rate=2., regularizer=L2Decay(0.))) + # Proposal bbox regression deltas + self.rpn_bbox_pred = fluid.layers.conv2d( + rpn_conv, + num_filters=4 * num_anchor, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_bbox_pred', + param_attr=ParamAttr(name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="rpn_bbox_pred_b", learning_rate=2., regularizer=L2Decay(0.))) + return self.rpn_cls_score, self.rpn_bbox_pred + + def get_proposals(self, body_feats, im_info, mode='train'): + """ + Get proposals according to the output of backbone. + + Args: + body_feats (dict): The dictionary of feature maps from backbone. + im_info(Variable): The information of image with shape [N, 3] with + shape (height, width, scale). + body_feat_names(list): A list of names of feature maps from + backbone. + + Returns: + rpn_rois(Variable): Output proposals with shape of (rois_num, 4). + """ + # In RPN Heads, only the last feature map of backbone is used. + # And body_feat_names[-1] represents the last level name of backbone. + body_feat = list(body_feats.values())[-1] + rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) + + if self.num_classes == 1: + rpn_cls_prob = fluid.layers.sigmoid(rpn_cls_score, name='rpn_cls_prob') + else: + rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_cls_score = fluid.layers.reshape(rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_tmp = fluid.layers.softmax(rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') + rpn_cls_prob_slice = fluid.layers.slice(rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) + rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) + rpn_cls_prob = fluid.layers.reshape(rpn_cls_prob, shape=(0, 0, 0, -1)) + rpn_cls_prob = fluid.layers.transpose(rpn_cls_prob, perm=[0, 3, 1, 2]) + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + # prop_op + rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( + scores=rpn_cls_prob, + bbox_deltas=rpn_bbox_pred, + im_info=im_info, + anchors=self.anchor, + variances=self.anchor_var, + pre_nms_top_n=prop_op.pre_nms_top_n, + post_nms_top_n=prop_op.post_nms_top_n, + nms_thresh=prop_op.nms_thresh, + min_size=prop_op.min_size, + eta=prop_op.eta) + return rpn_rois + + def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, anchor_var): + rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) + anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) + anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) + rpn_cls_score = fluid.layers.reshape(x=rpn_cls_score, shape=(0, -1, self.num_classes)) + rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) + return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var + + def _get_loss_input(self): + for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: + if not getattr(self, attr, None): + raise ValueError("self.{} should not be None,".format(attr), "call RPNHead.get_proposals first") + return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, self.anchor, self.anchor_var) + + def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): + """ + Sample proposals and Calculate rpn loss. + + Args: + im_info(Variable): The information of image with shape [N, 3] with + shape (height, width, scale). + gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. + M is the number of groundtruth. + is_crowd(Variable): Indicates groud-truth is crowd or not with + shape [M, 1]. M is the number of groundtruth. + + Returns: + Type: dict + rpn_cls_loss(Variable): RPN classification loss. + rpn_bbox_loss(Variable): RPN bounding box regression loss. + + """ + rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() + if self.num_classes == 1: + # self.rpn_target_assign + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + fluid.layers.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + is_crowd=is_crowd, + im_info=im_info, + rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, + rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, + rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, + rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, + rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, + use_random=self.rpn_target_assign.use_random) + score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') + score_tgt.stop_gradient = True + rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=score_pred, label=score_tgt) + else: + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + self.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + gt_labels=gt_label, + is_crowd=is_crowd, + num_classes=self.num_classes, + im_info=im_info) + labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') + labels_int64.stop_gradient = True + rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( + logits=score_pred, label=labels_int64, numeric_stable_mode=True) + + rpn_cls_loss = fluid.layers.reduce_mean(rpn_cls_loss, name='loss_rpn_cls') + + loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') + loc_tgt.stop_gradient = True + rpn_reg_loss = fluid.layers.smooth_l1( + x=loc_pred, y=loc_tgt, sigma=3.0, inside_weight=bbox_weight, outside_weight=bbox_weight) + rpn_reg_loss = fluid.layers.reduce_sum(rpn_reg_loss, name='loss_rpn_bbox') + score_shape = fluid.layers.shape(score_tgt) + score_shape = fluid.layers.cast(x=score_shape, dtype='float32') + norm = fluid.layers.reduce_prod(score_shape) + norm.stop_gradient = True + rpn_reg_loss = rpn_reg_loss / norm + return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/__init__.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/__init__.py rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/__init__.py diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9d2b506753aeadbd9c0e5af78dc80c61e1253394 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py @@ -0,0 +1,242 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal, Xavier +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import MSRA + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, + score_threshold=.05, + nms_top_k=-1, + keep_top_k=100, + nms_threshold=.5, + normalized=False, + nms_eta=1.0, + background_label=0): + super(MultiClassNMS, self).__init__() + self.score_threshold = score_threshold + self.nms_top_k = nms_top_k + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.normalized = normalized + self.nms_eta = nms_eta + self.background_label = background_label + + +class SmoothL1Loss(object): + ''' + Smooth L1 loss + Args: + sigma (float): hyper param in smooth l1 loss + ''' + + def __init__(self, sigma=1.0): + super(SmoothL1Loss, self).__init__() + self.sigma = sigma + + def __call__(self, x, y, inside_weight=None, outside_weight=None): + return fluid.layers.smooth_l1( + x, y, inside_weight=inside_weight, outside_weight=outside_weight, sigma=self.sigma) + + +class BoxCoder(object): + def __init__(self, prior_box_var=[0.1, 0.1, 0.2, 0.2], code_type='decode_center_size', box_normalized=False, + axis=1): + super(BoxCoder, self).__init__() + self.prior_box_var = prior_box_var + self.code_type = code_type + self.box_normalized = box_normalized + self.axis = axis + + +class TwoFCHead(object): + """ + RCNN head with two Fully Connected layers + + Args: + mlp_dim (int): num of filters for the fc layers + """ + + def __init__(self, mlp_dim=1024): + super(TwoFCHead, self).__init__() + self.mlp_dim = mlp_dim + + def __call__(self, roi_feat): + fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] + + fc6 = fluid.layers.fc( + input=roi_feat, + size=self.mlp_dim, + act='relu', + name='fc6', + param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) + head_feat = fluid.layers.fc( + input=fc6, + size=self.mlp_dim, + act='relu', + name='fc7', + param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), + bias_attr=ParamAttr(name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) + + return head_feat + + +class BBoxHead(object): + """ + RCNN bbox head + + Args: + head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` + box_coder (object): `BoxCoder` instance + nms (object): `MultiClassNMS` instance + num_classes: number of output classes + """ + __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] + __shared__ = ['num_classes'] + + def __init__(self, head, box_coder=BoxCoder(), nms=MultiClassNMS(), bbox_loss=SmoothL1Loss(), num_classes=81): + super(BBoxHead, self).__init__() + self.head = head + self.num_classes = num_classes + self.box_coder = box_coder + self.nms = nms + self.bbox_loss = bbox_loss + self.head_feat = None + + def get_head_feat(self, input=None): + """ + Get the bbox head feature map. + """ + + if input is not None: + feat = self.head(input) + if isinstance(feat, OrderedDict): + feat = list(feat.values())[0] + self.head_feat = feat + return self.head_feat + + def _get_output(self, roi_feat): + """ + Get bbox head output. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + + Returns: + cls_score(Variable): Output of rpn head with shape of + [N, num_anchors, H, W]. + bbox_pred(Variable): Output of rpn head with shape of + [N, num_anchors * 4, H, W]. + """ + head_feat = self.get_head_feat(roi_feat) + # when ResNetC5 output a single feature map + if not isinstance(self.head, TwoFCHead): + head_feat = fluid.layers.pool2d(head_feat, pool_type='avg', global_pooling=True) + cls_score = fluid.layers.fc( + input=head_feat, + size=self.num_classes, + act=None, + name='cls_score', + param_attr=ParamAttr(name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), + bias_attr=ParamAttr(name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) + bbox_pred = fluid.layers.fc( + input=head_feat, + size=4 * self.num_classes, + act=None, + name='bbox_pred', + param_attr=ParamAttr(name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), + bias_attr=ParamAttr(name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) + return cls_score, bbox_pred + + def get_loss(self, roi_feat, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): + """ + Get bbox_head loss. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + labels_int32(Variable): Class label of a RoI with shape [P, 1]. + P is the number of RoI. + bbox_targets(Variable): Box label of a RoI with shape + [P, 4 * class_nums]. + bbox_inside_weights(Variable): Indicates whether a box should + contribute to loss. Same shape as bbox_targets. + bbox_outside_weights(Variable): Indicates whether a box should + contribute to loss. Same shape as bbox_targets. + + Return: + Type: Dict + loss_cls(Variable): bbox_head loss. + loss_bbox(Variable): bbox_head loss. + """ + + cls_score, bbox_pred = self._get_output(roi_feat) + + labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') + labels_int64.stop_gradient = True + loss_cls = fluid.layers.softmax_with_cross_entropy( + logits=cls_score, label=labels_int64, numeric_stable_mode=True) + loss_cls = fluid.layers.reduce_mean(loss_cls) + loss_bbox = self.bbox_loss( + x=bbox_pred, y=bbox_targets, inside_weight=bbox_inside_weights, outside_weight=bbox_outside_weights) + loss_bbox = fluid.layers.reduce_mean(loss_bbox) + return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} + + def get_prediction(self, roi_feat, rois, im_info, im_shape, return_box_score=False): + """ + Get prediction bounding box in test stage. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + rois (Variable): Output of generate_proposals in rpn head. + im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the + number of input images, each element consists of im_height, + im_width, im_scale. + im_shape (Variable): Actual shape of original image with shape + [B, 3]. B is the number of images, each element consists of + original_height, original_width, 1 + + Returns: + pred_result(Variable): Prediction result with shape [N, 6]. Each + row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. + N is the total number of prediction. + """ + cls_score, bbox_pred = self._get_output(roi_feat) + + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + boxes = rois / im_scale + cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) + bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) + # self.box_coder + decoded_box = fluid.layers.box_coder( + prior_box=boxes, + target_box=bbox_pred, + prior_box_var=self.box_coder.prior_box_var, + code_type=self.box_coder.code_type, + box_normalized=self.box_coder.box_normalized, + axis=self.box_coder.axis) + cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) + if return_box_score: + return {'bbox': cliped_box, 'score': cls_prob} + # self.nms + pred_result = fluid.layers.multiclass_nms( + bboxes=cliped_box, + scores=cls_prob, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + normalized=self.nms.normalized, + nms_eta=self.nms.nms_eta, + background_label=self.nms.background_label) + return pred_result diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..05006e5182fc119a78630234fcdd07356cdc6042 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py @@ -0,0 +1,105 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +__all__ = ['test_reader'] + + +def test_reader(paths=None, images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is: + image (numpy.ndarray): the image to be fed into network + im_info (numpy.ndarray): the info about the preprocessed. + im_shape (numpy.ndarray): the shape of image. + """ + img_list = list() + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + im = im.astype(np.float32, copy=False) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + target_size = 800 + max_size = 1333 + + shape = im.shape + # im_shape holds the original shape of image. + im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') + im_size_min = np.min(shape[0:2]) + im_size_max = np.max(shape[0:2]) + im_scale = float(target_size) / float(im_size_min) + if np.round(im_scale * im_size_max) > max_size: + im_scale = float(max_size) / float(im_size_max) + + resize_w = np.round(im_scale * float(shape[1])) + resize_h = np.round(im_scale * float(shape[0])) + # im_info holds the resize info of image. + im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') + + im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + + # HWC --> CHW + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + yield {'image': im, 'im_info': im_info, 'im_shape': im_shape} + + +def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): + max_shape_org = np.array([data['image'].shape for data in batch_data]).max(axis=0) + if coarsest_stride > 0: + max_shape = np.zeros((3)).astype('int32') + max_shape[1] = int(np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) + max_shape[2] = int(np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) + else: + max_shape = max_shape_org.astype('int32') + + padding_image = list() + padding_info = list() + padding_shape = list() + + for data in batch_data: + im_c, im_h, im_w = data['image'].shape + # image + padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), dtype=np.float32) + padding_im[:, 0:im_h, 0:im_w] = data['image'] + padding_image.append(padding_im) + # im_info + data['im_info'][0] = max_shape[1] if use_padded_im_info else max_shape_org[1] + data['im_info'][1] = max_shape[2] if use_padded_im_info else max_shape_org[2] + padding_info.append(data['im_info']) + padding_shape.append(data['im_shape']) + + padding_image = np.array(padding_image).astype('float32') + padding_info = np.array(padding_info).astype('float32') + padding_shape = np.array(padding_shape).astype('float32') + return padding_image, padding_info, padding_shape diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..c7020b271c1a6d8d5ad91275e9b87c834fa08b1c --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py @@ -0,0 +1,251 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Xavier +from paddle.fluid.regularizer import L2Decay + +__all__ = ['ConvNorm', 'FPN'] + + +def ConvNorm(input, + num_filters, + filter_size, + stride=1, + groups=1, + norm_decay=0., + norm_type='affine_channel', + norm_groups=32, + dilation=1, + lr_scale=1, + freeze_norm=False, + act=None, + norm_name=None, + initializer=None, + name=None): + fan = num_filters + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=((filter_size - 1) // 2) * dilation, + dilation=dilation, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights", initializer=initializer, learning_rate=lr_scale), + bias_attr=False, + name=name + '.conv2d.output.1') + + norm_lr = 0. if freeze_norm else 1. + pattr = ParamAttr(name=norm_name + '_scale', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=norm_name + '_offset', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) + + if norm_type in ['bn', 'sync_bn']: + global_stats = True if freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=norm_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=norm_name + '_mean', + moving_variance_name=norm_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'gn': + out = fluid.layers.group_norm( + input=conv, act=act, name=norm_name + '.output.1', groups=norm_groups, param_attr=pattr, bias_attr=battr) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + +class FPN(object): + """ + Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 + + Args: + num_chan (int): number of feature channels + min_level (int): lowest level of the backbone feature map to use + max_level (int): highest level of the backbone feature map to use + spatial_scale (list): feature map scaling factor + has_extra_convs (bool): whether has extral convolutions in higher levels + norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' + """ + __shared__ = ['norm_type', 'freeze_norm'] + + def __init__(self, + num_chan=256, + min_level=2, + max_level=6, + spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], + has_extra_convs=False, + norm_type=None, + freeze_norm=False): + self.freeze_norm = freeze_norm + self.num_chan = num_chan + self.min_level = min_level + self.max_level = max_level + self.spatial_scale = spatial_scale + self.has_extra_convs = has_extra_convs + self.norm_type = norm_type + + def _add_topdown_lateral(self, body_name, body_input, upper_output): + lateral_name = 'fpn_inner_' + body_name + '_lateral' + topdown_name = 'fpn_topdown_' + body_name + fan = body_input.shape[1] + if self.norm_type: + initializer = Xavier(fan_out=fan) + lateral = ConvNorm( + body_input, + self.num_chan, + 1, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=lateral_name, + norm_name=lateral_name) + else: + lateral = fluid.layers.conv2d( + body_input, + self.num_chan, + 1, + param_attr=ParamAttr(name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=lateral_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=lateral_name) + topdown = fluid.layers.resize_nearest(upper_output, scale=2., name=topdown_name) + return lateral + topdown + + def get_output(self, body_dict): + """ + Add FPN onto backbone. + + Args: + body_dict(OrderedDict): Dictionary of variables and each element is the + output of backbone. + + Return: + fpn_dict(OrderedDict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + """ + spatial_scale = copy.deepcopy(self.spatial_scale) + body_name_list = list(body_dict.keys())[::-1] + num_backbone_stages = len(body_name_list) + self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] + fpn_inner_name = 'fpn_inner_' + body_name_list[0] + body_input = body_dict[body_name_list[0]] + fan = body_input.shape[1] + if self.norm_type: + initializer = Xavier(fan_out=fan) + self.fpn_inner_output[0] = ConvNorm( + body_input, + self.num_chan, + 1, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=fpn_inner_name, + norm_name=fpn_inner_name) + else: + self.fpn_inner_output[0] = fluid.layers.conv2d( + body_input, + self.num_chan, + 1, + param_attr=ParamAttr(name=fpn_inner_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_inner_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_inner_name) + for i in range(1, num_backbone_stages): + body_name = body_name_list[i] + body_input = body_dict[body_name] + top_output = self.fpn_inner_output[i - 1] + fpn_inner_single = self._add_topdown_lateral(body_name, body_input, top_output) + self.fpn_inner_output[i] = fpn_inner_single + fpn_dict = {} + fpn_name_list = [] + for i in range(num_backbone_stages): + fpn_name = 'fpn_' + body_name_list[i] + fan = self.fpn_inner_output[i].shape[1] * 3 * 3 + if self.norm_type: + initializer = Xavier(fan_out=fan) + fpn_output = ConvNorm( + self.fpn_inner_output[i], + self.num_chan, + 3, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=fpn_name, + norm_name=fpn_name) + else: + fpn_output = fluid.layers.conv2d( + self.fpn_inner_output[i], + self.num_chan, + filter_size=3, + padding=1, + param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_name) + fpn_dict[fpn_name] = fpn_output + fpn_name_list.append(fpn_name) + if not self.has_extra_convs and self.max_level - self.min_level == len(spatial_scale): + body_top_name = fpn_name_list[0] + body_top_extension = fluid.layers.pool2d( + fpn_dict[body_top_name], 1, 'max', pool_stride=2, name=body_top_name + '_subsampled_2x') + fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension + fpn_name_list.insert(0, body_top_name + '_subsampled_2x') + spatial_scale.insert(0, spatial_scale[0] * 0.5) + # Coarser FPN levels introduced for RetinaNet + highest_backbone_level = self.min_level + len(spatial_scale) - 1 + if self.has_extra_convs and self.max_level > highest_backbone_level: + fpn_blob = body_dict[body_name_list[0]] + for i in range(highest_backbone_level + 1, self.max_level + 1): + fpn_blob_in = fpn_blob + fpn_name = 'fpn_' + str(i) + if i > highest_backbone_level + 1: + fpn_blob_in = fluid.layers.relu(fpn_blob) + fan = fpn_blob_in.shape[1] * 3 * 3 + fpn_blob = fluid.layers.conv2d( + input=fpn_blob_in, + num_filters=self.num_chan, + filter_size=3, + stride=2, + padding=1, + param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_name) + fpn_dict[fpn_name] = fpn_blob + fpn_name_list.insert(0, fpn_name) + spatial_scale.insert(0, spatial_scale[0] * 0.5) + res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) + return res_dict, spatial_scale diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/label_file.txt b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/label_file.txt rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/label_file.txt diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..a12d4a5d02d4e2bd7feb807264eb266423be428c --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py @@ -0,0 +1,390 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import ast +import argparse +from collections import OrderedDict +from functools import partial +from math import ceil + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser +from paddlehub.common.paddle_helper import add_vars_prefix + +from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch +from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN +from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet +from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead +from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead +from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner +from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign + + +@moduleinfo( + name="faster_rcnn_resnet50_fpn_coco2017", + version="1.0.0", + type="cv/object_detection", + summary= + "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class FasterRCNNResNet50RPN(hub.Module): + def _initialize(self): + # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] + self.default_pretrained_model_path = os.path.join(self.directory, "faster_rcnn_resnet50_fpn_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + num_classes (int): number of categories + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + phase (str): optional choices are 'train' and 'predict'. + + Returns: + inputs (dict): the input variables. + outputs (dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32') + # backbone + backbone = ResNet(norm_type='affine_channel', depth=50, feature_maps=[2, 3, 4, 5], freeze_at=2) + body_feats = backbone(image) + # fpn + fpn = FPN(max_level=6, min_level=2, num_chan=256, spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) + var_prefix = '@HUB_{}@'.format(self.name) + im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) + im_shape = fluid.layers.data(name='im_shape', shape=[3], dtype='float32', lod_level=0) + body_feat_names = list(body_feats.keys()) + body_feats, spatial_scale = fpn.get_output(body_feats) + # rpn_head: RPNHead + rpn_head = self.rpn_head() + rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) + # train + if phase == 'train': + gt_bbox = fluid.layers.data(name='gt_bbox', shape=[4], dtype='float32', lod_level=1) + is_crowd = fluid.layers.data(name='is_crowd', shape=[1], dtype='int32', lod_level=1) + gt_class = fluid.layers.data(name='gt_class', shape=[1], dtype='int32', lod_level=1) + rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) + # bbox_assigner: BBoxAssigner + bbox_assigner = self.bbox_assigner(num_classes) + outs = fluid.layers.generate_proposal_labels( + rpn_rois=rois, + gt_classes=gt_class, + is_crowd=is_crowd, + gt_boxes=gt_bbox, + im_info=im_info, + batch_size_per_im=bbox_assigner.batch_size_per_im, + fg_fraction=bbox_assigner.fg_fraction, + fg_thresh=bbox_assigner.fg_thresh, + bg_thresh_hi=bbox_assigner.bg_thresh_hi, + bg_thresh_lo=bbox_assigner.bg_thresh_lo, + bbox_reg_weights=bbox_assigner.bbox_reg_weights, + class_nums=bbox_assigner.class_nums, + use_random=bbox_assigner.use_random) + rois = outs[0] + + roi_extractor = self.roi_extractor() + roi_feat = roi_extractor(head_inputs=body_feats, rois=rois, spatial_scale=spatial_scale) + # head_feat + bbox_head = self.bbox_head(num_classes) + head_feat = bbox_head.head(roi_feat) + if isinstance(head_feat, OrderedDict): + head_feat = list(head_feat.values())[0] + if phase == 'train': + inputs = { + 'image': var_prefix + image.name, + 'im_info': var_prefix + im_info.name, + 'im_shape': var_prefix + im_shape.name, + 'gt_class': var_prefix + gt_class.name, + 'gt_bbox': var_prefix + gt_bbox.name, + 'is_crowd': var_prefix + is_crowd.name + } + outputs = { + 'head_features': var_prefix + head_feat.name, + 'rpn_cls_loss': var_prefix + rpn_loss['rpn_cls_loss'].name, + 'rpn_reg_loss': var_prefix + rpn_loss['rpn_reg_loss'].name, + 'generate_proposal_labels': [var_prefix + var.name for var in outs] + } + elif phase == 'predict': + pred = bbox_head.get_prediction(roi_feat, rois, im_info, im_shape) + inputs = { + 'image': var_prefix + image.name, + 'im_info': var_prefix + im_info.name, + 'im_shape': var_prefix + im_shape.name + } + outputs = { + 'head_features': var_prefix + head_feat.name, + 'rois': var_prefix + rois.name, + 'bbox_out': var_prefix + pred.name + } + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(startup_program, var_prefix) + + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = { + key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] + for key, value in outputs.items() + } + + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + if pretrained: + + def _if_exist(var): + if num_classes != 81: + if 'bbox_pred' in var.name or 'cls_score' in var.name: + return False + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + return inputs, outputs, context_prog + + def rpn_head(self): + return FPNRPNHead( + anchor_generator=AnchorGenerator( + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + stride=[16.0, 16.0], + variance=[1.0, 1.0, 1.0, 1.0]), + rpn_target_assign=RPNTargetAssign( + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5, + rpn_negative_overlap=0.3, + rpn_positive_overlap=0.7, + rpn_straddle_thresh=0.0), + train_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=2000, pre_nms_top_n=2000), + test_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=1000, pre_nms_top_n=1000), + anchor_start_size=32, + num_chan=256, + min_level=2, + max_level=6) + + def roi_extractor(self): + return FPNRoIAlign( + canconical_level=4, canonical_size=224, max_level=5, min_level=2, box_resolution=7, sampling_ratio=2) + + def bbox_head(self, num_classes): + return BBoxHead( + head=TwoFCHead(mlp_dim=1024), + nms=MultiClassNMS(keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), + num_classes=num_classes) + + def bbox_assigner(self, num_classes): + return BBoxAssigner( + batch_size_per_im=512, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + fg_fraction=0.25, + fg_thresh=0.5, + class_nums=num_classes) + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def object_detection(self, + paths=None, + images=None, + data=None, + use_gpu=False, + batch_size=1, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + if data and 'image' in data: + paths += data['image'] + + all_images = list() + for yield_data in test_reader(paths, images): + all_images.append(yield_data) + + images_num = len(all_images) + loop_num = ceil(images_num / batch_size) + res = [] + + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + + padding_image, padding_info, padding_shape = padding_minibatch( + batch_data, coarsest_stride=32, use_padded_im_info=True) + padding_image_tensor = PaddleTensor(padding_image.copy()) + padding_info_tensor = PaddleTensor(padding_info.copy()) + padding_shape_tensor = PaddleTensor(padding_shape.copy()) + feed_list = [padding_image_tensor, padding_info_tensor, padding_shape_tensor] + + if use_gpu: + data_out = self.gpu_predictor.run(feed_list) + else: + data_out = self.cpu_predictor.run(feed_list) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=handle_id, + visualization=visualization) + res += output + + return res + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = [] + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.object_detection(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..077b7be32afcf127531028d25593872af7c109b0 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py @@ -0,0 +1,159 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = [ + 'base64_to_cv2', + 'load_label_info', + 'postprocess', +] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..77a3f7f4c7b16c3f9c65c46fc93eb394befa5110 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..c1b560d0957d12a948864ab5820fa8c7afb7dab6 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py @@ -0,0 +1,76 @@ +# coding=utf-8 +import paddle.fluid as fluid + +__all__ = ['FPNRoIAlign'] + + +class FPNRoIAlign(object): + """ + RoI align pooling for FPN feature maps + Args: + sampling_ratio (int): number of sampling points + min_level (int): lowest level of FPN layer + max_level (int): highest level of FPN layer + canconical_level (int): the canconical FPN feature map level + canonical_size (int): the canconical FPN feature map size + box_resolution (int): box resolution + mask_resolution (int): mask roi resolution + """ + + def __init__(self, + sampling_ratio=0, + min_level=2, + max_level=5, + canconical_level=4, + canonical_size=224, + box_resolution=7, + mask_resolution=14): + super(FPNRoIAlign, self).__init__() + self.sampling_ratio = sampling_ratio + self.min_level = min_level + self.max_level = max_level + self.canconical_level = canconical_level + self.canonical_size = canonical_size + self.box_resolution = box_resolution + self.mask_resolution = mask_resolution + + def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): + """ + Adopt RoI align onto several level of feature maps to get RoI features. + Distribute RoIs to different levels by area and get a list of RoI + features by distributed RoIs and their corresponding feature maps. + + Returns: + roi_feat(Variable): RoI features with shape of [M, C, R, R], + where M is the number of RoIs and R is RoI resolution + + """ + k_min = self.min_level + k_max = self.max_level + num_roi_lvls = k_max - k_min + 1 + name_list = list(head_inputs.keys()) + input_name_list = name_list[-num_roi_lvls:] + spatial_scale = spatial_scale[-num_roi_lvls:] + rois_dist, restore_index = fluid.layers.distribute_fpn_proposals(rois, k_min, k_max, self.canconical_level, + self.canonical_size) + # rois_dist is in ascend order + roi_out_list = [] + resolution = is_mask and self.mask_resolution or self.box_resolution + for lvl in range(num_roi_lvls): + name_index = num_roi_lvls - lvl - 1 + rois_input = rois_dist[lvl] + head_input = head_inputs[input_name_list[name_index]] + sc = spatial_scale[name_index] + roi_out = fluid.layers.roi_align( + input=head_input, + rois=rois_input, + pooled_height=resolution, + pooled_width=resolution, + spatial_scale=sc, + sampling_ratio=self.sampling_ratio) + roi_out_list.append(roi_out) + roi_feat_shuffle = fluid.layers.concat(roi_out_list) + roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) + roi_feat = fluid.layers.lod_reset(roi_feat_, rois) + + return roi_feat diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..8dccb9dcb85a372a0fbc74027bb08fb6abd93bda --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py @@ -0,0 +1,455 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal +from paddle.fluid.regularizer import L2Decay + +__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead'] + + +class AnchorGenerator(object): + # __op__ = fluid.layers.anchor_generator + def __init__(self, + stride=[16.0, 16.0], + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1., 2.], + variance=[1., 1., 1., 1.]): + super(AnchorGenerator, self).__init__() + self.anchor_sizes = anchor_sizes + self.aspect_ratios = aspect_ratios + self.variance = variance + self.stride = stride + + +class RPNTargetAssign(object): + # __op__ = fluid.layers.rpn_target_assign + def __init__(self, + rpn_batch_size_per_im=256, + rpn_straddle_thresh=0., + rpn_fg_fraction=0.5, + rpn_positive_overlap=0.7, + rpn_negative_overlap=0.3, + use_random=True): + super(RPNTargetAssign, self).__init__() + self.rpn_batch_size_per_im = rpn_batch_size_per_im + self.rpn_straddle_thresh = rpn_straddle_thresh + self.rpn_fg_fraction = rpn_fg_fraction + self.rpn_positive_overlap = rpn_positive_overlap + self.rpn_negative_overlap = rpn_negative_overlap + self.use_random = use_random + + +class GenerateProposals(object): + # __op__ = fluid.layers.generate_proposals + def __init__(self, pre_nms_top_n=6000, post_nms_top_n=1000, nms_thresh=.5, min_size=.1, eta=1.): + super(GenerateProposals, self).__init__() + self.pre_nms_top_n = pre_nms_top_n + self.post_nms_top_n = post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = min_size + self.eta = eta + + +class RPNHead(object): + """ + RPN Head + + Args: + anchor_generator (object): `AnchorGenerator` instance + rpn_target_assign (object): `RPNTargetAssign` instance + train_proposal (object): `GenerateProposals` instance for training + test_proposal (object): `GenerateProposals` instance for testing + num_classes (int): number of classes in rpn output + """ + __inject__ = ['anchor_generator', 'rpn_target_assign', 'train_proposal', 'test_proposal'] + + def __init__(self, anchor_generator, rpn_target_assign, train_proposal, test_proposal, num_classes=1): + super(RPNHead, self).__init__() + self.anchor_generator = anchor_generator + self.rpn_target_assign = rpn_target_assign + self.train_proposal = train_proposal + self.test_proposal = test_proposal + self.num_classes = num_classes + + def _get_output(self, input): + """ + Get anchor and RPN head output. + + Args: + input(Variable): feature map from backbone with shape of [N, C, H, W] + + Returns: + rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. + rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. + """ + dim_out = input.shape[1] + rpn_conv = fluid.layers.conv2d( + input=input, + num_filters=dim_out, + filter_size=3, + stride=1, + padding=1, + act='relu', + name='conv_rpn', + param_attr=ParamAttr(name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + # Generate anchors self.anchor_generator + self.anchor, self.anchor_var = fluid.layers.anchor_generator( + input=rpn_conv, + anchor_sizes=self.anchor_generator.anchor_sizes, + aspect_ratios=self.anchor_generator.aspect_ratios, + variance=self.anchor_generator.variance, + stride=self.anchor_generator.stride) + + num_anchor = self.anchor.shape[2] + # Proposal classification scores + self.rpn_cls_score = fluid.layers.conv2d( + rpn_conv, + num_filters=num_anchor * self.num_classes, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_cls_score', + param_attr=ParamAttr(name="rpn_cls_logits_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="rpn_cls_logits_b", learning_rate=2., regularizer=L2Decay(0.))) + # Proposal bbox regression deltas + self.rpn_bbox_pred = fluid.layers.conv2d( + rpn_conv, + num_filters=4 * num_anchor, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_bbox_pred', + param_attr=ParamAttr(name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="rpn_bbox_pred_b", learning_rate=2., regularizer=L2Decay(0.))) + return self.rpn_cls_score, self.rpn_bbox_pred + + def get_proposals(self, body_feats, im_info, mode='train'): + """ + Get proposals according to the output of backbone. + + Args: + body_feats (dict): The dictionary of feature maps from backbone. + im_info(Variable): The information of image with shape [N, 3] with + shape (height, width, scale). + body_feat_names(list): A list of names of feature maps from + backbone. + + Returns: + rpn_rois(Variable): Output proposals with shape of (rois_num, 4). + """ + # In RPN Heads, only the last feature map of backbone is used. + # And body_feat_names[-1] represents the last level name of backbone. + body_feat = list(body_feats.values())[-1] + rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) + + if self.num_classes == 1: + rpn_cls_prob = fluid.layers.sigmoid(rpn_cls_score, name='rpn_cls_prob') + else: + rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_cls_score = fluid.layers.reshape(rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_tmp = fluid.layers.softmax(rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') + rpn_cls_prob_slice = fluid.layers.slice(rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) + rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) + rpn_cls_prob = fluid.layers.reshape(rpn_cls_prob, shape=(0, 0, 0, -1)) + rpn_cls_prob = fluid.layers.transpose(rpn_cls_prob, perm=[0, 3, 1, 2]) + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + # prop_op + rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( + scores=rpn_cls_prob, + bbox_deltas=rpn_bbox_pred, + im_info=im_info, + anchors=self.anchor, + variances=self.anchor_var, + pre_nms_top_n=prop_op.pre_nms_top_n, + post_nms_top_n=prop_op.post_nms_top_n, + nms_thresh=prop_op.nms_thresh, + min_size=prop_op.min_size, + eta=prop_op.eta) + return rpn_rois + + def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, anchor_var): + rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) + anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) + anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) + rpn_cls_score = fluid.layers.reshape(x=rpn_cls_score, shape=(0, -1, self.num_classes)) + rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) + return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var + + def _get_loss_input(self): + for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: + if not getattr(self, attr, None): + raise ValueError("self.{} should not be None,".format(attr), "call RPNHead.get_proposals first") + return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, self.anchor, self.anchor_var) + + def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): + """ + Sample proposals and Calculate rpn loss. + + Args: + im_info(Variable): The information of image with shape [N, 3] with + shape (height, width, scale). + gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. + M is the number of groundtruth. + is_crowd(Variable): Indicates groud-truth is crowd or not with + shape [M, 1]. M is the number of groundtruth. + + Returns: + Type: dict + rpn_cls_loss(Variable): RPN classification loss. + rpn_bbox_loss(Variable): RPN bounding box regression loss. + + """ + rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() + if self.num_classes == 1: + # self.rpn_target_assign + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + fluid.layers.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + is_crowd=is_crowd, + im_info=im_info, + rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, + rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, + rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, + rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, + rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, + use_random=self.rpn_target_assign.use_random) + score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') + score_tgt.stop_gradient = True + rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=score_pred, label=score_tgt) + else: + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + self.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + gt_labels=gt_label, + is_crowd=is_crowd, + num_classes=self.num_classes, + im_info=im_info) + labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') + labels_int64.stop_gradient = True + rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( + logits=score_pred, label=labels_int64, numeric_stable_mode=True) + + rpn_cls_loss = fluid.layers.reduce_mean(rpn_cls_loss, name='loss_rpn_cls') + + loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') + loc_tgt.stop_gradient = True + rpn_reg_loss = fluid.layers.smooth_l1( + x=loc_pred, y=loc_tgt, sigma=3.0, inside_weight=bbox_weight, outside_weight=bbox_weight) + rpn_reg_loss = fluid.layers.reduce_sum(rpn_reg_loss, name='loss_rpn_bbox') + score_shape = fluid.layers.shape(score_tgt) + score_shape = fluid.layers.cast(x=score_shape, dtype='float32') + norm = fluid.layers.reduce_prod(score_shape) + norm.stop_gradient = True + rpn_reg_loss = rpn_reg_loss / norm + return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} + + +class FPNRPNHead(RPNHead): + """ + RPN Head that supports FPN input + + Args: + anchor_generator (object): `AnchorGenerator` instance + rpn_target_assign (object): `RPNTargetAssign` instance + train_proposal (object): `GenerateProposals` instance for training + test_proposal (object): `GenerateProposals` instance for testing + anchor_start_size (int): size of anchor at the first scale + num_chan (int): number of FPN output channels + min_level (int): lowest level of FPN output + max_level (int): highest level of FPN output + num_classes (int): number of classes in rpn output + """ + + def __init__(self, + anchor_generator, + rpn_target_assign, + train_proposal, + test_proposal, + anchor_start_size=32, + num_chan=256, + min_level=2, + max_level=6, + num_classes=1): + super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, train_proposal, test_proposal) + self.anchor_start_size = anchor_start_size + self.num_chan = num_chan + self.min_level = min_level + self.max_level = max_level + self.num_classes = num_classes + + self.fpn_rpn_list = [] + self.anchors_list = [] + self.anchor_var_list = [] + + def _get_output(self, input, feat_lvl): + """ + Get anchor and FPN RPN head output at one level. + + Args: + input(Variable): Body feature from backbone. + feat_lvl(int): Indicate the level of rpn output corresponding + to the level of feature map. + + Return: + rpn_cls_score(Variable): Output of one level of fpn rpn head with + shape of [N, num_anchors, H, W]. + rpn_bbox_pred(Variable): Output of one level of fpn rpn head with + shape of [N, num_anchors * 4, H, W]. + """ + slvl = str(feat_lvl) + conv_name = 'conv_rpn_fpn' + slvl + cls_name = 'rpn_cls_logits_fpn' + slvl + bbox_name = 'rpn_bbox_pred_fpn' + slvl + conv_share_name = 'conv_rpn_fpn' + str(self.min_level) + cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) + bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) + + num_anchors = len(self.anchor_generator.aspect_ratios) + conv_rpn_fpn = fluid.layers.conv2d( + input=input, + num_filters=self.num_chan, + filter_size=3, + padding=1, + act='relu', + name=conv_name, + param_attr=ParamAttr(name=conv_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=conv_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + + # self.anchor_generator + self.anchors, self.anchor_var = fluid.layers.anchor_generator( + input=conv_rpn_fpn, + anchor_sizes=(self.anchor_start_size * 2.**(feat_lvl - self.min_level), ), + stride=(2.**feat_lvl, 2.**feat_lvl), + aspect_ratios=self.anchor_generator.aspect_ratios, + variance=self.anchor_generator.variance) + + cls_num_filters = num_anchors * self.num_classes + self.rpn_cls_score = fluid.layers.conv2d( + input=conv_rpn_fpn, + num_filters=cls_num_filters, + filter_size=1, + act=None, + name=cls_name, + param_attr=ParamAttr(name=cls_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=cls_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + self.rpn_bbox_pred = fluid.layers.conv2d( + input=conv_rpn_fpn, + num_filters=num_anchors * 4, + filter_size=1, + act=None, + name=bbox_name, + param_attr=ParamAttr(name=bbox_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=bbox_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + return self.rpn_cls_score, self.rpn_bbox_pred + + def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): + """ + Get proposals in one level according to the output of fpn rpn head + + Args: + body_feat(Variable): the feature map from backone. + im_info(Variable): The information of image with shape [N, 3] with + format (height, width, scale). + feat_lvl(int): Indicate the level of proposals corresponding to + the feature maps. + + Returns: + rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). + rpn_roi_probs_fpn(Variable): Scores of proposals with + shape of (rois_num, 1). + """ + + rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat, feat_lvl) + + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + if self.num_classes == 1: + rpn_cls_prob_fpn = fluid.layers.sigmoid(rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) + else: + rpn_cls_score_fpn = fluid.layers.transpose(rpn_cls_score_fpn, perm=[0, 2, 3, 1]) + rpn_cls_score_fpn = fluid.layers.reshape(rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_fpn = fluid.layers.softmax( + rpn_cls_score_fpn, use_cudnn=False, name='rpn_cls_prob_fpn' + str(feat_lvl)) + rpn_cls_prob_fpn = fluid.layers.slice(rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes]) + rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) + rpn_cls_prob_fpn = fluid.layers.reshape(rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) + rpn_cls_prob_fpn = fluid.layers.transpose(rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) + # prop_op + rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals( + scores=rpn_cls_prob_fpn, + bbox_deltas=rpn_bbox_pred_fpn, + im_info=im_info, + anchors=self.anchors, + variances=self.anchor_var, + pre_nms_top_n=prop_op.pre_nms_top_n, + post_nms_top_n=prop_op.post_nms_top_n, + nms_thresh=prop_op.nms_thresh, + min_size=prop_op.min_size, + eta=prop_op.eta) + return rpn_rois_fpn, rpn_roi_prob_fpn + + def get_proposals(self, fpn_feats, im_info, mode='train'): + """ + Get proposals in multiple levels according to the output of fpn + rpn head + + Args: + fpn_feats(dict): A dictionary represents the output feature map + of FPN with their name. + im_info(Variable): The information of image with shape [N, 3] with + format (height, width, scale). + + Return: + rois_list(Variable): Output proposals in shape of [rois_num, 4] + """ + rois_list = [] + roi_probs_list = [] + fpn_feat_names = list(fpn_feats.keys()) + for lvl in range(self.min_level, self.max_level + 1): + fpn_feat_name = fpn_feat_names[self.max_level - lvl] + fpn_feat = fpn_feats[fpn_feat_name] + rois_fpn, roi_probs_fpn = self._get_single_proposals(fpn_feat, im_info, lvl, mode) + self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) + rois_list.append(rois_fpn) + roi_probs_list.append(roi_probs_fpn) + self.anchors_list.append(self.anchors) + self.anchor_var_list.append(self.anchor_var) + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + post_nms_top_n = prop_op.post_nms_top_n + rois_collect = fluid.layers.collect_fpn_proposals( + rois_list, roi_probs_list, self.min_level, self.max_level, post_nms_top_n, name='collect') + return rois_collect + + def _get_loss_input(self): + rpn_clses = [] + rpn_bboxes = [] + anchors = [] + anchor_vars = [] + for i in range(len(self.fpn_rpn_list)): + single_input = self._transform_input(self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], self.anchors_list[i], + self.anchor_var_list[i]) + rpn_clses.append(single_input[0]) + rpn_bboxes.append(single_input[1]) + anchors.append(single_input[2]) + anchor_vars.append(single_input[3]) + + rpn_cls = fluid.layers.concat(rpn_clses, axis=1) + rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) + anchors = fluid.layers.concat(anchors) + anchor_var = fluid.layers.concat(anchor_vars) + return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README.md similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README.md rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README.md diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/__init__.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/__init__.py rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/__init__.py diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_assigner.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_assigner.py rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_assigner.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_head.py new file mode 100644 index 0000000000000000000000000000000000000000..9d2b506753aeadbd9c0e5af78dc80c61e1253394 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/bbox_head.py @@ -0,0 +1,242 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal, Xavier +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import MSRA + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, + score_threshold=.05, + nms_top_k=-1, + keep_top_k=100, + nms_threshold=.5, + normalized=False, + nms_eta=1.0, + background_label=0): + super(MultiClassNMS, self).__init__() + self.score_threshold = score_threshold + self.nms_top_k = nms_top_k + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.normalized = normalized + self.nms_eta = nms_eta + self.background_label = background_label + + +class SmoothL1Loss(object): + ''' + Smooth L1 loss + Args: + sigma (float): hyper param in smooth l1 loss + ''' + + def __init__(self, sigma=1.0): + super(SmoothL1Loss, self).__init__() + self.sigma = sigma + + def __call__(self, x, y, inside_weight=None, outside_weight=None): + return fluid.layers.smooth_l1( + x, y, inside_weight=inside_weight, outside_weight=outside_weight, sigma=self.sigma) + + +class BoxCoder(object): + def __init__(self, prior_box_var=[0.1, 0.1, 0.2, 0.2], code_type='decode_center_size', box_normalized=False, + axis=1): + super(BoxCoder, self).__init__() + self.prior_box_var = prior_box_var + self.code_type = code_type + self.box_normalized = box_normalized + self.axis = axis + + +class TwoFCHead(object): + """ + RCNN head with two Fully Connected layers + + Args: + mlp_dim (int): num of filters for the fc layers + """ + + def __init__(self, mlp_dim=1024): + super(TwoFCHead, self).__init__() + self.mlp_dim = mlp_dim + + def __call__(self, roi_feat): + fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] + + fc6 = fluid.layers.fc( + input=roi_feat, + size=self.mlp_dim, + act='relu', + name='fc6', + param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) + head_feat = fluid.layers.fc( + input=fc6, + size=self.mlp_dim, + act='relu', + name='fc7', + param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), + bias_attr=ParamAttr(name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) + + return head_feat + + +class BBoxHead(object): + """ + RCNN bbox head + + Args: + head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` + box_coder (object): `BoxCoder` instance + nms (object): `MultiClassNMS` instance + num_classes: number of output classes + """ + __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] + __shared__ = ['num_classes'] + + def __init__(self, head, box_coder=BoxCoder(), nms=MultiClassNMS(), bbox_loss=SmoothL1Loss(), num_classes=81): + super(BBoxHead, self).__init__() + self.head = head + self.num_classes = num_classes + self.box_coder = box_coder + self.nms = nms + self.bbox_loss = bbox_loss + self.head_feat = None + + def get_head_feat(self, input=None): + """ + Get the bbox head feature map. + """ + + if input is not None: + feat = self.head(input) + if isinstance(feat, OrderedDict): + feat = list(feat.values())[0] + self.head_feat = feat + return self.head_feat + + def _get_output(self, roi_feat): + """ + Get bbox head output. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + + Returns: + cls_score(Variable): Output of rpn head with shape of + [N, num_anchors, H, W]. + bbox_pred(Variable): Output of rpn head with shape of + [N, num_anchors * 4, H, W]. + """ + head_feat = self.get_head_feat(roi_feat) + # when ResNetC5 output a single feature map + if not isinstance(self.head, TwoFCHead): + head_feat = fluid.layers.pool2d(head_feat, pool_type='avg', global_pooling=True) + cls_score = fluid.layers.fc( + input=head_feat, + size=self.num_classes, + act=None, + name='cls_score', + param_attr=ParamAttr(name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), + bias_attr=ParamAttr(name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) + bbox_pred = fluid.layers.fc( + input=head_feat, + size=4 * self.num_classes, + act=None, + name='bbox_pred', + param_attr=ParamAttr(name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), + bias_attr=ParamAttr(name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) + return cls_score, bbox_pred + + def get_loss(self, roi_feat, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): + """ + Get bbox_head loss. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + labels_int32(Variable): Class label of a RoI with shape [P, 1]. + P is the number of RoI. + bbox_targets(Variable): Box label of a RoI with shape + [P, 4 * class_nums]. + bbox_inside_weights(Variable): Indicates whether a box should + contribute to loss. Same shape as bbox_targets. + bbox_outside_weights(Variable): Indicates whether a box should + contribute to loss. Same shape as bbox_targets. + + Return: + Type: Dict + loss_cls(Variable): bbox_head loss. + loss_bbox(Variable): bbox_head loss. + """ + + cls_score, bbox_pred = self._get_output(roi_feat) + + labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') + labels_int64.stop_gradient = True + loss_cls = fluid.layers.softmax_with_cross_entropy( + logits=cls_score, label=labels_int64, numeric_stable_mode=True) + loss_cls = fluid.layers.reduce_mean(loss_cls) + loss_bbox = self.bbox_loss( + x=bbox_pred, y=bbox_targets, inside_weight=bbox_inside_weights, outside_weight=bbox_outside_weights) + loss_bbox = fluid.layers.reduce_mean(loss_bbox) + return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} + + def get_prediction(self, roi_feat, rois, im_info, im_shape, return_box_score=False): + """ + Get prediction bounding box in test stage. + + Args: + roi_feat (Variable): RoI feature from RoIExtractor. + rois (Variable): Output of generate_proposals in rpn head. + im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the + number of input images, each element consists of im_height, + im_width, im_scale. + im_shape (Variable): Actual shape of original image with shape + [B, 3]. B is the number of images, each element consists of + original_height, original_width, 1 + + Returns: + pred_result(Variable): Prediction result with shape [N, 6]. Each + row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. + N is the total number of prediction. + """ + cls_score, bbox_pred = self._get_output(roi_feat) + + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + boxes = rois / im_scale + cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) + bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) + # self.box_coder + decoded_box = fluid.layers.box_coder( + prior_box=boxes, + target_box=bbox_pred, + prior_box_var=self.box_coder.prior_box_var, + code_type=self.box_coder.code_type, + box_normalized=self.box_coder.box_normalized, + axis=self.box_coder.axis) + cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) + if return_box_score: + return {'bbox': cliped_box, 'score': cls_prob} + # self.nms + pred_result = fluid.layers.multiclass_nms( + bboxes=cliped_box, + scores=cls_prob, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + normalized=self.nms.normalized, + nms_eta=self.nms.nms_eta, + background_label=self.nms.background_label) + return pred_result diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..05006e5182fc119a78630234fcdd07356cdc6042 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/data_feed.py @@ -0,0 +1,105 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image, ImageEnhance +from paddle import fluid + +__all__ = ['test_reader'] + + +def test_reader(paths=None, images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is: + image (numpy.ndarray): the image to be fed into network + im_info (numpy.ndarray): the info about the preprocessed. + im_shape (numpy.ndarray): the shape of image. + """ + img_list = list() + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + im = im.astype(np.float32, copy=False) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + target_size = 800 + max_size = 1333 + + shape = im.shape + # im_shape holds the original shape of image. + im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') + im_size_min = np.min(shape[0:2]) + im_size_max = np.max(shape[0:2]) + im_scale = float(target_size) / float(im_size_min) + if np.round(im_scale * im_size_max) > max_size: + im_scale = float(max_size) / float(im_size_max) + + resize_w = np.round(im_scale * float(shape[1])) + resize_h = np.round(im_scale * float(shape[0])) + # im_info holds the resize info of image. + im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') + + im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + + # HWC --> CHW + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + yield {'image': im, 'im_info': im_info, 'im_shape': im_shape} + + +def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): + max_shape_org = np.array([data['image'].shape for data in batch_data]).max(axis=0) + if coarsest_stride > 0: + max_shape = np.zeros((3)).astype('int32') + max_shape[1] = int(np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) + max_shape[2] = int(np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) + else: + max_shape = max_shape_org.astype('int32') + + padding_image = list() + padding_info = list() + padding_shape = list() + + for data in batch_data: + im_c, im_h, im_w = data['image'].shape + # image + padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), dtype=np.float32) + padding_im[:, 0:im_h, 0:im_w] = data['image'] + padding_image.append(padding_im) + # im_info + data['im_info'][0] = max_shape[1] if use_padded_im_info else max_shape_org[1] + data['im_info'][1] = max_shape[2] if use_padded_im_info else max_shape_org[2] + padding_info.append(data['im_info']) + padding_shape.append(data['im_shape']) + + padding_image = np.array(padding_image).astype('float32') + padding_info = np.array(padding_info).astype('float32') + padding_shape = np.array(padding_shape).astype('float32') + return padding_image, padding_info, padding_shape diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/fpn.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..c7020b271c1a6d8d5ad91275e9b87c834fa08b1c --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/fpn.py @@ -0,0 +1,251 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Xavier +from paddle.fluid.regularizer import L2Decay + +__all__ = ['ConvNorm', 'FPN'] + + +def ConvNorm(input, + num_filters, + filter_size, + stride=1, + groups=1, + norm_decay=0., + norm_type='affine_channel', + norm_groups=32, + dilation=1, + lr_scale=1, + freeze_norm=False, + act=None, + norm_name=None, + initializer=None, + name=None): + fan = num_filters + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=((filter_size - 1) // 2) * dilation, + dilation=dilation, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights", initializer=initializer, learning_rate=lr_scale), + bias_attr=False, + name=name + '.conv2d.output.1') + + norm_lr = 0. if freeze_norm else 1. + pattr = ParamAttr(name=norm_name + '_scale', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=norm_name + '_offset', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) + + if norm_type in ['bn', 'sync_bn']: + global_stats = True if freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=norm_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=norm_name + '_mean', + moving_variance_name=norm_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'gn': + out = fluid.layers.group_norm( + input=conv, act=act, name=norm_name + '.output.1', groups=norm_groups, param_attr=pattr, bias_attr=battr) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + +class FPN(object): + """ + Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 + + Args: + num_chan (int): number of feature channels + min_level (int): lowest level of the backbone feature map to use + max_level (int): highest level of the backbone feature map to use + spatial_scale (list): feature map scaling factor + has_extra_convs (bool): whether has extral convolutions in higher levels + norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' + """ + __shared__ = ['norm_type', 'freeze_norm'] + + def __init__(self, + num_chan=256, + min_level=2, + max_level=6, + spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], + has_extra_convs=False, + norm_type=None, + freeze_norm=False): + self.freeze_norm = freeze_norm + self.num_chan = num_chan + self.min_level = min_level + self.max_level = max_level + self.spatial_scale = spatial_scale + self.has_extra_convs = has_extra_convs + self.norm_type = norm_type + + def _add_topdown_lateral(self, body_name, body_input, upper_output): + lateral_name = 'fpn_inner_' + body_name + '_lateral' + topdown_name = 'fpn_topdown_' + body_name + fan = body_input.shape[1] + if self.norm_type: + initializer = Xavier(fan_out=fan) + lateral = ConvNorm( + body_input, + self.num_chan, + 1, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=lateral_name, + norm_name=lateral_name) + else: + lateral = fluid.layers.conv2d( + body_input, + self.num_chan, + 1, + param_attr=ParamAttr(name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=lateral_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=lateral_name) + topdown = fluid.layers.resize_nearest(upper_output, scale=2., name=topdown_name) + return lateral + topdown + + def get_output(self, body_dict): + """ + Add FPN onto backbone. + + Args: + body_dict(OrderedDict): Dictionary of variables and each element is the + output of backbone. + + Return: + fpn_dict(OrderedDict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + """ + spatial_scale = copy.deepcopy(self.spatial_scale) + body_name_list = list(body_dict.keys())[::-1] + num_backbone_stages = len(body_name_list) + self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] + fpn_inner_name = 'fpn_inner_' + body_name_list[0] + body_input = body_dict[body_name_list[0]] + fan = body_input.shape[1] + if self.norm_type: + initializer = Xavier(fan_out=fan) + self.fpn_inner_output[0] = ConvNorm( + body_input, + self.num_chan, + 1, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=fpn_inner_name, + norm_name=fpn_inner_name) + else: + self.fpn_inner_output[0] = fluid.layers.conv2d( + body_input, + self.num_chan, + 1, + param_attr=ParamAttr(name=fpn_inner_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_inner_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_inner_name) + for i in range(1, num_backbone_stages): + body_name = body_name_list[i] + body_input = body_dict[body_name] + top_output = self.fpn_inner_output[i - 1] + fpn_inner_single = self._add_topdown_lateral(body_name, body_input, top_output) + self.fpn_inner_output[i] = fpn_inner_single + fpn_dict = {} + fpn_name_list = [] + for i in range(num_backbone_stages): + fpn_name = 'fpn_' + body_name_list[i] + fan = self.fpn_inner_output[i].shape[1] * 3 * 3 + if self.norm_type: + initializer = Xavier(fan_out=fan) + fpn_output = ConvNorm( + self.fpn_inner_output[i], + self.num_chan, + 3, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=fpn_name, + norm_name=fpn_name) + else: + fpn_output = fluid.layers.conv2d( + self.fpn_inner_output[i], + self.num_chan, + filter_size=3, + padding=1, + param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_name) + fpn_dict[fpn_name] = fpn_output + fpn_name_list.append(fpn_name) + if not self.has_extra_convs and self.max_level - self.min_level == len(spatial_scale): + body_top_name = fpn_name_list[0] + body_top_extension = fluid.layers.pool2d( + fpn_dict[body_top_name], 1, 'max', pool_stride=2, name=body_top_name + '_subsampled_2x') + fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension + fpn_name_list.insert(0, body_top_name + '_subsampled_2x') + spatial_scale.insert(0, spatial_scale[0] * 0.5) + # Coarser FPN levels introduced for RetinaNet + highest_backbone_level = self.min_level + len(spatial_scale) - 1 + if self.has_extra_convs and self.max_level > highest_backbone_level: + fpn_blob = body_dict[body_name_list[0]] + for i in range(highest_backbone_level + 1, self.max_level + 1): + fpn_blob_in = fpn_blob + fpn_name = 'fpn_' + str(i) + if i > highest_backbone_level + 1: + fpn_blob_in = fluid.layers.relu(fpn_blob) + fan = fpn_blob_in.shape[1] * 3 * 3 + fpn_blob = fluid.layers.conv2d( + input=fpn_blob_in, + num_filters=self.num_chan, + filter_size=3, + stride=2, + padding=1, + param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_name) + fpn_dict[fpn_name] = fpn_blob + fpn_name_list.insert(0, fpn_name) + spatial_scale.insert(0, spatial_scale[0] * 0.5) + res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) + return res_dict, spatial_scale diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/module.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3392f22c9d1744f4d54d24dcc40ec74e611fc635 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/module.py @@ -0,0 +1,200 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import ast +import argparse +from collections import OrderedDict +from functools import partial +from math import ceil + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser +from paddlehub.common.paddle_helper import add_vars_prefix + +from faster_rcnn_resnet50_fpn_venus.processor import load_label_info, postprocess, base64_to_cv2 +from faster_rcnn_resnet50_fpn_venus.data_feed import test_reader, padding_minibatch +from faster_rcnn_resnet50_fpn_venus.fpn import FPN +from faster_rcnn_resnet50_fpn_venus.resnet import ResNet +from faster_rcnn_resnet50_fpn_venus.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead +from faster_rcnn_resnet50_fpn_venus.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead +from faster_rcnn_resnet50_fpn_venus.bbox_assigner import BBoxAssigner +from faster_rcnn_resnet50_fpn_venus.roi_extractor import FPNRoIAlign + + +@moduleinfo( + name="faster_rcnn_resnet50_fpn_venus", + version="1.0.0", + type="cv/object_detection", + summary= + "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class FasterRCNNResNet50RPN(hub.Module): + def _initialize(self): + # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] + self.default_pretrained_model_path = os.path.join(self.directory, "faster_rcnn_resnet50_fpn_model") + + def context(self, num_classes=708, trainable=True, pretrained=True, phase='train'): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + phase (str): optional choices are 'train' and 'predict'. + + Returns: + inputs (dict): the input variables. + outputs (dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32') + # backbone + backbone = ResNet(norm_type='affine_channel', depth=50, feature_maps=[2, 3, 4, 5], freeze_at=2) + body_feats = backbone(image) + # fpn + fpn = FPN(max_level=6, min_level=2, num_chan=256, spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) + var_prefix = '@HUB_{}@'.format(self.name) + im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) + im_shape = fluid.layers.data(name='im_shape', shape=[3], dtype='float32', lod_level=0) + body_feat_names = list(body_feats.keys()) + body_feats, spatial_scale = fpn.get_output(body_feats) + # rpn_head: RPNHead + rpn_head = self.rpn_head() + rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) + # train + if phase == 'train': + gt_bbox = fluid.layers.data(name='gt_bbox', shape=[4], dtype='float32', lod_level=1) + is_crowd = fluid.layers.data(name='is_crowd', shape=[1], dtype='int32', lod_level=1) + gt_class = fluid.layers.data(name='gt_class', shape=[1], dtype='int32', lod_level=1) + rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) + # bbox_assigner: BBoxAssigner + bbox_assigner = self.bbox_assigner(num_classes) + outs = fluid.layers.generate_proposal_labels( + rpn_rois=rois, + gt_classes=gt_class, + is_crowd=is_crowd, + gt_boxes=gt_bbox, + im_info=im_info, + batch_size_per_im=bbox_assigner.batch_size_per_im, + fg_fraction=bbox_assigner.fg_fraction, + fg_thresh=bbox_assigner.fg_thresh, + bg_thresh_hi=bbox_assigner.bg_thresh_hi, + bg_thresh_lo=bbox_assigner.bg_thresh_lo, + bbox_reg_weights=bbox_assigner.bbox_reg_weights, + class_nums=bbox_assigner.class_nums, + use_random=bbox_assigner.use_random) + rois = outs[0] + + roi_extractor = self.roi_extractor() + roi_feat = roi_extractor(head_inputs=body_feats, rois=rois, spatial_scale=spatial_scale) + # head_feat + bbox_head = self.bbox_head(num_classes) + head_feat = bbox_head.head(roi_feat) + if isinstance(head_feat, OrderedDict): + head_feat = list(head_feat.values())[0] + if phase == 'train': + inputs = { + 'image': var_prefix + image.name, + 'im_info': var_prefix + im_info.name, + 'im_shape': var_prefix + im_shape.name, + 'gt_class': var_prefix + gt_class.name, + 'gt_bbox': var_prefix + gt_bbox.name, + 'is_crowd': var_prefix + is_crowd.name + } + outputs = { + 'head_features': var_prefix + head_feat.name, + 'rpn_cls_loss': var_prefix + rpn_loss['rpn_cls_loss'].name, + 'rpn_reg_loss': var_prefix + rpn_loss['rpn_reg_loss'].name, + 'generate_proposal_labels': [var_prefix + var.name for var in outs] + } + elif phase == 'predict': + pred = bbox_head.get_prediction(roi_feat, rois, im_info, im_shape) + inputs = { + 'image': var_prefix + image.name, + 'im_info': var_prefix + im_info.name, + 'im_shape': var_prefix + im_shape.name + } + outputs = { + 'head_features': var_prefix + head_feat.name, + 'rois': var_prefix + rois.name, + 'bbox_out': var_prefix + pred.name + } + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(startup_program, var_prefix) + + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = { + key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] + for key, value in outputs.items() + } + + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + if pretrained: + + def _if_exist(var): + if num_classes != 81: + if 'bbox_pred' in var.name or 'cls_score' in var.name: + return False + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + return inputs, outputs, context_prog + + def rpn_head(self): + return FPNRPNHead( + anchor_generator=AnchorGenerator( + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + stride=[16.0, 16.0], + variance=[1.0, 1.0, 1.0, 1.0]), + rpn_target_assign=RPNTargetAssign( + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5, + rpn_negative_overlap=0.3, + rpn_positive_overlap=0.7, + rpn_straddle_thresh=0.0), + train_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=2000, pre_nms_top_n=2000), + test_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=1000, pre_nms_top_n=1000), + anchor_start_size=32, + num_chan=256, + min_level=2, + max_level=6) + + def roi_extractor(self): + return FPNRoIAlign( + canconical_level=4, canonical_size=224, max_level=5, min_level=2, box_resolution=7, sampling_ratio=2) + + def bbox_head(self, num_classes): + return BBoxHead( + head=TwoFCHead(mlp_dim=1024), + nms=MultiClassNMS(keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), + num_classes=num_classes) + + def bbox_assigner(self, num_classes): + return BBoxAssigner( + batch_size_per_im=512, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + fg_fraction=0.25, + fg_thresh=0.5, + class_nums=num_classes) diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/name_adapter.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/name_adapter.py rename to modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/name_adapter.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..7b69586a5b22b23112ca2044879b4478173bb37e --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/processor.py @@ -0,0 +1,159 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = [ + 'base64_to_cv2', + 'load_label_info', + 'postprocess', +] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return xmin, ymin, xmax, ymax + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = confidence + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..77a3f7f4c7b16c3f9c65c46fc93eb394befa5110 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/roi_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..c1b560d0957d12a948864ab5820fa8c7afb7dab6 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/roi_extractor.py @@ -0,0 +1,76 @@ +# coding=utf-8 +import paddle.fluid as fluid + +__all__ = ['FPNRoIAlign'] + + +class FPNRoIAlign(object): + """ + RoI align pooling for FPN feature maps + Args: + sampling_ratio (int): number of sampling points + min_level (int): lowest level of FPN layer + max_level (int): highest level of FPN layer + canconical_level (int): the canconical FPN feature map level + canonical_size (int): the canconical FPN feature map size + box_resolution (int): box resolution + mask_resolution (int): mask roi resolution + """ + + def __init__(self, + sampling_ratio=0, + min_level=2, + max_level=5, + canconical_level=4, + canonical_size=224, + box_resolution=7, + mask_resolution=14): + super(FPNRoIAlign, self).__init__() + self.sampling_ratio = sampling_ratio + self.min_level = min_level + self.max_level = max_level + self.canconical_level = canconical_level + self.canonical_size = canonical_size + self.box_resolution = box_resolution + self.mask_resolution = mask_resolution + + def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): + """ + Adopt RoI align onto several level of feature maps to get RoI features. + Distribute RoIs to different levels by area and get a list of RoI + features by distributed RoIs and their corresponding feature maps. + + Returns: + roi_feat(Variable): RoI features with shape of [M, C, R, R], + where M is the number of RoIs and R is RoI resolution + + """ + k_min = self.min_level + k_max = self.max_level + num_roi_lvls = k_max - k_min + 1 + name_list = list(head_inputs.keys()) + input_name_list = name_list[-num_roi_lvls:] + spatial_scale = spatial_scale[-num_roi_lvls:] + rois_dist, restore_index = fluid.layers.distribute_fpn_proposals(rois, k_min, k_max, self.canconical_level, + self.canonical_size) + # rois_dist is in ascend order + roi_out_list = [] + resolution = is_mask and self.mask_resolution or self.box_resolution + for lvl in range(num_roi_lvls): + name_index = num_roi_lvls - lvl - 1 + rois_input = rois_dist[lvl] + head_input = head_inputs[input_name_list[name_index]] + sc = spatial_scale[name_index] + roi_out = fluid.layers.roi_align( + input=head_input, + rois=rois_input, + pooled_height=resolution, + pooled_width=resolution, + spatial_scale=sc, + sampling_ratio=self.sampling_ratio) + roi_out_list.append(roi_out) + roi_feat_shuffle = fluid.layers.concat(roi_out_list) + roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) + roi_feat = fluid.layers.lod_reset(roi_feat_, rois) + + return roi_feat diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/rpn_head.py new file mode 100644 index 0000000000000000000000000000000000000000..8dccb9dcb85a372a0fbc74027bb08fb6abd93bda --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/rpn_head.py @@ -0,0 +1,455 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal +from paddle.fluid.regularizer import L2Decay + +__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead'] + + +class AnchorGenerator(object): + # __op__ = fluid.layers.anchor_generator + def __init__(self, + stride=[16.0, 16.0], + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1., 2.], + variance=[1., 1., 1., 1.]): + super(AnchorGenerator, self).__init__() + self.anchor_sizes = anchor_sizes + self.aspect_ratios = aspect_ratios + self.variance = variance + self.stride = stride + + +class RPNTargetAssign(object): + # __op__ = fluid.layers.rpn_target_assign + def __init__(self, + rpn_batch_size_per_im=256, + rpn_straddle_thresh=0., + rpn_fg_fraction=0.5, + rpn_positive_overlap=0.7, + rpn_negative_overlap=0.3, + use_random=True): + super(RPNTargetAssign, self).__init__() + self.rpn_batch_size_per_im = rpn_batch_size_per_im + self.rpn_straddle_thresh = rpn_straddle_thresh + self.rpn_fg_fraction = rpn_fg_fraction + self.rpn_positive_overlap = rpn_positive_overlap + self.rpn_negative_overlap = rpn_negative_overlap + self.use_random = use_random + + +class GenerateProposals(object): + # __op__ = fluid.layers.generate_proposals + def __init__(self, pre_nms_top_n=6000, post_nms_top_n=1000, nms_thresh=.5, min_size=.1, eta=1.): + super(GenerateProposals, self).__init__() + self.pre_nms_top_n = pre_nms_top_n + self.post_nms_top_n = post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = min_size + self.eta = eta + + +class RPNHead(object): + """ + RPN Head + + Args: + anchor_generator (object): `AnchorGenerator` instance + rpn_target_assign (object): `RPNTargetAssign` instance + train_proposal (object): `GenerateProposals` instance for training + test_proposal (object): `GenerateProposals` instance for testing + num_classes (int): number of classes in rpn output + """ + __inject__ = ['anchor_generator', 'rpn_target_assign', 'train_proposal', 'test_proposal'] + + def __init__(self, anchor_generator, rpn_target_assign, train_proposal, test_proposal, num_classes=1): + super(RPNHead, self).__init__() + self.anchor_generator = anchor_generator + self.rpn_target_assign = rpn_target_assign + self.train_proposal = train_proposal + self.test_proposal = test_proposal + self.num_classes = num_classes + + def _get_output(self, input): + """ + Get anchor and RPN head output. + + Args: + input(Variable): feature map from backbone with shape of [N, C, H, W] + + Returns: + rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. + rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. + """ + dim_out = input.shape[1] + rpn_conv = fluid.layers.conv2d( + input=input, + num_filters=dim_out, + filter_size=3, + stride=1, + padding=1, + act='relu', + name='conv_rpn', + param_attr=ParamAttr(name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + # Generate anchors self.anchor_generator + self.anchor, self.anchor_var = fluid.layers.anchor_generator( + input=rpn_conv, + anchor_sizes=self.anchor_generator.anchor_sizes, + aspect_ratios=self.anchor_generator.aspect_ratios, + variance=self.anchor_generator.variance, + stride=self.anchor_generator.stride) + + num_anchor = self.anchor.shape[2] + # Proposal classification scores + self.rpn_cls_score = fluid.layers.conv2d( + rpn_conv, + num_filters=num_anchor * self.num_classes, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_cls_score', + param_attr=ParamAttr(name="rpn_cls_logits_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="rpn_cls_logits_b", learning_rate=2., regularizer=L2Decay(0.))) + # Proposal bbox regression deltas + self.rpn_bbox_pred = fluid.layers.conv2d( + rpn_conv, + num_filters=4 * num_anchor, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_bbox_pred', + param_attr=ParamAttr(name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name="rpn_bbox_pred_b", learning_rate=2., regularizer=L2Decay(0.))) + return self.rpn_cls_score, self.rpn_bbox_pred + + def get_proposals(self, body_feats, im_info, mode='train'): + """ + Get proposals according to the output of backbone. + + Args: + body_feats (dict): The dictionary of feature maps from backbone. + im_info(Variable): The information of image with shape [N, 3] with + shape (height, width, scale). + body_feat_names(list): A list of names of feature maps from + backbone. + + Returns: + rpn_rois(Variable): Output proposals with shape of (rois_num, 4). + """ + # In RPN Heads, only the last feature map of backbone is used. + # And body_feat_names[-1] represents the last level name of backbone. + body_feat = list(body_feats.values())[-1] + rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) + + if self.num_classes == 1: + rpn_cls_prob = fluid.layers.sigmoid(rpn_cls_score, name='rpn_cls_prob') + else: + rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_cls_score = fluid.layers.reshape(rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_tmp = fluid.layers.softmax(rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') + rpn_cls_prob_slice = fluid.layers.slice(rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) + rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) + rpn_cls_prob = fluid.layers.reshape(rpn_cls_prob, shape=(0, 0, 0, -1)) + rpn_cls_prob = fluid.layers.transpose(rpn_cls_prob, perm=[0, 3, 1, 2]) + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + # prop_op + rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( + scores=rpn_cls_prob, + bbox_deltas=rpn_bbox_pred, + im_info=im_info, + anchors=self.anchor, + variances=self.anchor_var, + pre_nms_top_n=prop_op.pre_nms_top_n, + post_nms_top_n=prop_op.post_nms_top_n, + nms_thresh=prop_op.nms_thresh, + min_size=prop_op.min_size, + eta=prop_op.eta) + return rpn_rois + + def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, anchor_var): + rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) + anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) + anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) + rpn_cls_score = fluid.layers.reshape(x=rpn_cls_score, shape=(0, -1, self.num_classes)) + rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) + return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var + + def _get_loss_input(self): + for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: + if not getattr(self, attr, None): + raise ValueError("self.{} should not be None,".format(attr), "call RPNHead.get_proposals first") + return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, self.anchor, self.anchor_var) + + def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): + """ + Sample proposals and Calculate rpn loss. + + Args: + im_info(Variable): The information of image with shape [N, 3] with + shape (height, width, scale). + gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. + M is the number of groundtruth. + is_crowd(Variable): Indicates groud-truth is crowd or not with + shape [M, 1]. M is the number of groundtruth. + + Returns: + Type: dict + rpn_cls_loss(Variable): RPN classification loss. + rpn_bbox_loss(Variable): RPN bounding box regression loss. + + """ + rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() + if self.num_classes == 1: + # self.rpn_target_assign + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + fluid.layers.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + is_crowd=is_crowd, + im_info=im_info, + rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, + rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, + rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, + rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, + rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, + use_random=self.rpn_target_assign.use_random) + score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') + score_tgt.stop_gradient = True + rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=score_pred, label=score_tgt) + else: + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + self.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + gt_labels=gt_label, + is_crowd=is_crowd, + num_classes=self.num_classes, + im_info=im_info) + labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') + labels_int64.stop_gradient = True + rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( + logits=score_pred, label=labels_int64, numeric_stable_mode=True) + + rpn_cls_loss = fluid.layers.reduce_mean(rpn_cls_loss, name='loss_rpn_cls') + + loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') + loc_tgt.stop_gradient = True + rpn_reg_loss = fluid.layers.smooth_l1( + x=loc_pred, y=loc_tgt, sigma=3.0, inside_weight=bbox_weight, outside_weight=bbox_weight) + rpn_reg_loss = fluid.layers.reduce_sum(rpn_reg_loss, name='loss_rpn_bbox') + score_shape = fluid.layers.shape(score_tgt) + score_shape = fluid.layers.cast(x=score_shape, dtype='float32') + norm = fluid.layers.reduce_prod(score_shape) + norm.stop_gradient = True + rpn_reg_loss = rpn_reg_loss / norm + return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} + + +class FPNRPNHead(RPNHead): + """ + RPN Head that supports FPN input + + Args: + anchor_generator (object): `AnchorGenerator` instance + rpn_target_assign (object): `RPNTargetAssign` instance + train_proposal (object): `GenerateProposals` instance for training + test_proposal (object): `GenerateProposals` instance for testing + anchor_start_size (int): size of anchor at the first scale + num_chan (int): number of FPN output channels + min_level (int): lowest level of FPN output + max_level (int): highest level of FPN output + num_classes (int): number of classes in rpn output + """ + + def __init__(self, + anchor_generator, + rpn_target_assign, + train_proposal, + test_proposal, + anchor_start_size=32, + num_chan=256, + min_level=2, + max_level=6, + num_classes=1): + super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, train_proposal, test_proposal) + self.anchor_start_size = anchor_start_size + self.num_chan = num_chan + self.min_level = min_level + self.max_level = max_level + self.num_classes = num_classes + + self.fpn_rpn_list = [] + self.anchors_list = [] + self.anchor_var_list = [] + + def _get_output(self, input, feat_lvl): + """ + Get anchor and FPN RPN head output at one level. + + Args: + input(Variable): Body feature from backbone. + feat_lvl(int): Indicate the level of rpn output corresponding + to the level of feature map. + + Return: + rpn_cls_score(Variable): Output of one level of fpn rpn head with + shape of [N, num_anchors, H, W]. + rpn_bbox_pred(Variable): Output of one level of fpn rpn head with + shape of [N, num_anchors * 4, H, W]. + """ + slvl = str(feat_lvl) + conv_name = 'conv_rpn_fpn' + slvl + cls_name = 'rpn_cls_logits_fpn' + slvl + bbox_name = 'rpn_bbox_pred_fpn' + slvl + conv_share_name = 'conv_rpn_fpn' + str(self.min_level) + cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) + bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) + + num_anchors = len(self.anchor_generator.aspect_ratios) + conv_rpn_fpn = fluid.layers.conv2d( + input=input, + num_filters=self.num_chan, + filter_size=3, + padding=1, + act='relu', + name=conv_name, + param_attr=ParamAttr(name=conv_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=conv_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + + # self.anchor_generator + self.anchors, self.anchor_var = fluid.layers.anchor_generator( + input=conv_rpn_fpn, + anchor_sizes=(self.anchor_start_size * 2.**(feat_lvl - self.min_level), ), + stride=(2.**feat_lvl, 2.**feat_lvl), + aspect_ratios=self.anchor_generator.aspect_ratios, + variance=self.anchor_generator.variance) + + cls_num_filters = num_anchors * self.num_classes + self.rpn_cls_score = fluid.layers.conv2d( + input=conv_rpn_fpn, + num_filters=cls_num_filters, + filter_size=1, + act=None, + name=cls_name, + param_attr=ParamAttr(name=cls_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=cls_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + self.rpn_bbox_pred = fluid.layers.conv2d( + input=conv_rpn_fpn, + num_filters=num_anchors * 4, + filter_size=1, + act=None, + name=bbox_name, + param_attr=ParamAttr(name=bbox_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=bbox_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + return self.rpn_cls_score, self.rpn_bbox_pred + + def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): + """ + Get proposals in one level according to the output of fpn rpn head + + Args: + body_feat(Variable): the feature map from backone. + im_info(Variable): The information of image with shape [N, 3] with + format (height, width, scale). + feat_lvl(int): Indicate the level of proposals corresponding to + the feature maps. + + Returns: + rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). + rpn_roi_probs_fpn(Variable): Scores of proposals with + shape of (rois_num, 1). + """ + + rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat, feat_lvl) + + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + if self.num_classes == 1: + rpn_cls_prob_fpn = fluid.layers.sigmoid(rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) + else: + rpn_cls_score_fpn = fluid.layers.transpose(rpn_cls_score_fpn, perm=[0, 2, 3, 1]) + rpn_cls_score_fpn = fluid.layers.reshape(rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_fpn = fluid.layers.softmax( + rpn_cls_score_fpn, use_cudnn=False, name='rpn_cls_prob_fpn' + str(feat_lvl)) + rpn_cls_prob_fpn = fluid.layers.slice(rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes]) + rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) + rpn_cls_prob_fpn = fluid.layers.reshape(rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) + rpn_cls_prob_fpn = fluid.layers.transpose(rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) + # prop_op + rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals( + scores=rpn_cls_prob_fpn, + bbox_deltas=rpn_bbox_pred_fpn, + im_info=im_info, + anchors=self.anchors, + variances=self.anchor_var, + pre_nms_top_n=prop_op.pre_nms_top_n, + post_nms_top_n=prop_op.post_nms_top_n, + nms_thresh=prop_op.nms_thresh, + min_size=prop_op.min_size, + eta=prop_op.eta) + return rpn_rois_fpn, rpn_roi_prob_fpn + + def get_proposals(self, fpn_feats, im_info, mode='train'): + """ + Get proposals in multiple levels according to the output of fpn + rpn head + + Args: + fpn_feats(dict): A dictionary represents the output feature map + of FPN with their name. + im_info(Variable): The information of image with shape [N, 3] with + format (height, width, scale). + + Return: + rois_list(Variable): Output proposals in shape of [rois_num, 4] + """ + rois_list = [] + roi_probs_list = [] + fpn_feat_names = list(fpn_feats.keys()) + for lvl in range(self.min_level, self.max_level + 1): + fpn_feat_name = fpn_feat_names[self.max_level - lvl] + fpn_feat = fpn_feats[fpn_feat_name] + rois_fpn, roi_probs_fpn = self._get_single_proposals(fpn_feat, im_info, lvl, mode) + self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) + rois_list.append(rois_fpn) + roi_probs_list.append(roi_probs_fpn) + self.anchors_list.append(self.anchors) + self.anchor_var_list.append(self.anchor_var) + prop_op = self.train_proposal if mode == 'train' else self.test_proposal + post_nms_top_n = prop_op.post_nms_top_n + rois_collect = fluid.layers.collect_fpn_proposals( + rois_list, roi_probs_list, self.min_level, self.max_level, post_nms_top_n, name='collect') + return rois_collect + + def _get_loss_input(self): + rpn_clses = [] + rpn_bboxes = [] + anchors = [] + anchor_vars = [] + for i in range(len(self.fpn_rpn_list)): + single_input = self._transform_input(self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], self.anchors_list[i], + self.anchor_var_list[i]) + rpn_clses.append(single_input[0]) + rpn_bboxes.append(single_input[1]) + anchors.append(single_input[2]) + anchor_vars.append(single_input[3]) + + rpn_cls = fluid.layers.concat(rpn_clses, axis=1) + rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) + anchors = fluid.layers.concat(anchors) + anchor_var = fluid.layers.concat(anchor_vars) + return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md rename to modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/__init__.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/__init__.py rename to modules/image/object_detection/retinanet_resnet50_fpn_coco2017/__init__.py diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..dbef6a3fc4ae231e6e08dac93af4674066920b43 --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py @@ -0,0 +1,99 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +from collections import OrderedDict + +import numpy as np +import cv2 +from PIL import Image, ImageEnhance +from paddle import fluid + +__all__ = ['test_reader', 'padding_minibatch'] + + +def test_reader(paths=None, images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (dict): key contains 'image' and 'im_info', the corresponding values is: + image (numpy.ndarray): the image to be fed into network + im_info (numpy.ndarray): the info about the preprocessed. + """ + img_list = list() + if paths: + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + for im in img_list: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + im = im.astype(np.float32, copy=False) + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + target_size = 800 + max_size = 1333 + shape = im.shape + # im_shape holds the original shape of image. + # im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') + im_size_min = np.min(shape[0:2]) + im_size_max = np.max(shape[0:2]) + im_scale = float(target_size) / float(im_size_min) + if np.round(im_scale * im_size_max) > max_size: + im_scale = float(max_size) / float(im_size_max) + + resize_w = np.round(im_scale * float(shape[1])) + resize_h = np.round(im_scale * float(shape[0])) + # im_info holds the resize info of image. + im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') + + im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + + # HWC --> CHW + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + yield {'image': im, 'im_info': im_info} + + +def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): + max_shape_org = np.array([data['image'].shape for data in batch_data]).max(axis=0) + if coarsest_stride > 0: + max_shape = np.zeros((3)).astype('int32') + max_shape[1] = int(np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) + max_shape[2] = int(np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) + else: + max_shape = max_shape_org.astype('int32') + + padding_image = list() + padding_info = list() + padding_shape = list() + + for data in batch_data: + im_c, im_h, im_w = data['image'].shape + # image + padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), dtype=np.float32) + padding_im[:, 0:im_h, 0:im_w] = data['image'] + padding_image.append(padding_im) + # im_info + data['im_info'][0] = max_shape[1] if use_padded_im_info else max_shape_org[1] + data['im_info'][1] = max_shape[2] if use_padded_im_info else max_shape_org[2] + padding_info.append(data['im_info']) + + padding_image = np.array(padding_image).astype('float32') + padding_info = np.array(padding_info).astype('float32') + return padding_image, padding_info diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..803b8acde9bbd289237d2bd1b9735fd905964edf --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py @@ -0,0 +1,237 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Xavier +from paddle.fluid.regularizer import L2Decay + +__all__ = ['FPN'] + + +def ConvNorm(input, + num_filters, + filter_size, + stride=1, + groups=1, + norm_decay=0., + norm_type='affine_channel', + norm_groups=32, + dilation=1, + lr_scale=1, + freeze_norm=False, + act=None, + norm_name=None, + initializer=None, + name=None): + fan = num_filters + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=((filter_size - 1) // 2) * dilation, + dilation=dilation, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights", initializer=initializer, learning_rate=lr_scale), + bias_attr=False, + name=name + '.conv2d.output.1') + norm_lr = 0. if freeze_norm else 1. + pattr = ParamAttr(name=norm_name + '_scale', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=norm_name + '_offset', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) + if norm_type in ['bn', 'sync_bn']: + global_stats = True if freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=norm_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=norm_name + '_mean', + moving_variance_name=norm_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'gn': + out = fluid.layers.group_norm( + input=conv, act=act, name=norm_name + '.output.1', groups=norm_groups, param_attr=pattr, bias_attr=battr) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + +class FPN(object): + """ + Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 + + Args: + num_chan (int): number of feature channels + min_level (int): lowest level of the backbone feature map to use + max_level (int): highest level of the backbone feature map to use + spatial_scale (list): feature map scaling factor + has_extra_convs (bool): whether has extral convolutions in higher levels + norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' + """ + __shared__ = ['norm_type', 'freeze_norm'] + + def __init__(self, + num_chan=256, + min_level=2, + max_level=6, + spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], + has_extra_convs=False, + norm_type=None, + freeze_norm=False): + self.freeze_norm = freeze_norm + self.num_chan = num_chan + self.min_level = min_level + self.max_level = max_level + self.spatial_scale = spatial_scale + self.has_extra_convs = has_extra_convs + self.norm_type = norm_type + + def _add_topdown_lateral(self, body_name, body_input, upper_output): + lateral_name = 'fpn_inner_' + body_name + '_lateral' + topdown_name = 'fpn_topdown_' + body_name + fan = body_input.shape[1] + if self.norm_type: + initializer = Xavier(fan_out=fan) + lateral = ConvNorm( + body_input, + self.num_chan, + 1, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=lateral_name, + norm_name=lateral_name) + else: + lateral = fluid.layers.conv2d( + body_input, + self.num_chan, + 1, + param_attr=ParamAttr(name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=lateral_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=lateral_name) + topdown = fluid.layers.resize_nearest(upper_output, scale=2., name=topdown_name) + + return lateral + topdown + + def get_output(self, body_dict): + """ + Add FPN onto backbone. + + Args: + body_dict(OrderedDict): Dictionary of variables and each element is the + output of backbone. + + Return: + fpn_dict(OrderedDict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + """ + spatial_scale = copy.deepcopy(self.spatial_scale) + body_name_list = list(body_dict.keys())[::-1] + num_backbone_stages = len(body_name_list) + self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] + fpn_inner_name = 'fpn_inner_' + body_name_list[0] + body_input = body_dict[body_name_list[0]] + fan = body_input.shape[1] + if self.norm_type: + initializer = Xavier(fan_out=fan) + self.fpn_inner_output[0] = ConvNorm( + body_input, + self.num_chan, + 1, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=fpn_inner_name, + norm_name=fpn_inner_name) + else: + self.fpn_inner_output[0] = fluid.layers.conv2d( + body_input, + self.num_chan, + 1, + param_attr=ParamAttr(name=fpn_inner_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_inner_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_inner_name) + for i in range(1, num_backbone_stages): + body_name = body_name_list[i] + body_input = body_dict[body_name] + top_output = self.fpn_inner_output[i - 1] + fpn_inner_single = self._add_topdown_lateral(body_name, body_input, top_output) + self.fpn_inner_output[i] = fpn_inner_single + fpn_dict = {} + fpn_name_list = [] + for i in range(num_backbone_stages): + fpn_name = 'fpn_' + body_name_list[i] + fan = self.fpn_inner_output[i].shape[1] * 3 * 3 + if self.norm_type: + initializer = Xavier(fan_out=fan) + fpn_output = ConvNorm( + self.fpn_inner_output[i], + self.num_chan, + 3, + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + name=fpn_name, + norm_name=fpn_name) + else: + fpn_output = fluid.layers.conv2d( + self.fpn_inner_output[i], + self.num_chan, + filter_size=3, + padding=1, + param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_name) + fpn_dict[fpn_name] = fpn_output + fpn_name_list.append(fpn_name) + if not self.has_extra_convs and self.max_level - self.min_level == len(spatial_scale): + body_top_name = fpn_name_list[0] + body_top_extension = fluid.layers.pool2d( + fpn_dict[body_top_name], 1, 'max', pool_stride=2, name=body_top_name + '_subsampled_2x') + fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension + fpn_name_list.insert(0, body_top_name + '_subsampled_2x') + spatial_scale.insert(0, spatial_scale[0] * 0.5) + # Coarser FPN levels introduced for RetinaNet + highest_backbone_level = self.min_level + len(spatial_scale) - 1 + if self.has_extra_convs and self.max_level > highest_backbone_level: + fpn_blob = body_dict[body_name_list[0]] + for i in range(highest_backbone_level + 1, self.max_level + 1): + fpn_blob_in = fpn_blob + fpn_name = 'fpn_' + str(i) + if i > highest_backbone_level + 1: + fpn_blob_in = fluid.layers.relu(fpn_blob) + fan = fpn_blob_in.shape[1] * 3 * 3 + fpn_blob = fluid.layers.conv2d( + input=fpn_blob_in, + num_filters=self.num_chan, + filter_size=3, + stride=2, + padding=1, + param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), + bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), + name=fpn_name) + fpn_dict[fpn_name] = fpn_blob + fpn_name_list.insert(0, fpn_name) + spatial_scale.insert(0, spatial_scale[0] * 0.5) + res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) + return res_dict, spatial_scale diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt rename to modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..5070dacb42d0eb4ca20d6e752c7239b83b2257ee --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py @@ -0,0 +1,302 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import ast +import argparse +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.io.parser import txt_parser +from paddlehub.common.paddle_helper import add_vars_prefix + +from retinanet_resnet50_fpn_coco2017.fpn import FPN +from retinanet_resnet50_fpn_coco2017.retina_head import AnchorGenerator, RetinaTargetAssign, RetinaOutputDecoder, RetinaHead +from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from retinanet_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch +from retinanet_resnet50_fpn_coco2017.resnet import ResNet + + +@moduleinfo( + name="retinanet_resnet50_fpn_coco2017", + version="1.0.0", + type="cv/object_detection", + summary="Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class RetinaNetResNet50FPN(hub.Module): + def _initialize(self): + # default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608) + self.default_pretrained_model_path = os.path.join(self.directory, "retinanet_resnet50_fpn_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.infer_prog = None + self.image = None + self.im_info = None + self.bbox_out = None + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + num_classes (int): number of classes. + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + phase (str): optional choices are 'train' and 'predict'. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + var_prefix = '@HUB_{}@'.format(self.name) + # image + image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32', lod_level=0) + # im_info + im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) + # backbone + backbone = ResNet( + norm_type='affine_channel', freeze_at=2, norm_decay=0., depth=50, feature_maps=[3, 4, 5]) + body_feats = backbone(image) + # retina_head + retina_head = RetinaHead( + anchor_generator=AnchorGenerator(aspect_ratios=[1.0, 2.0, 0.5], variance=[1.0, 1.0, 1.0, 1.0]), + target_assign=RetinaTargetAssign(positive_overlap=0.5, negative_overlap=0.4), + output_decoder=RetinaOutputDecoder( + score_thresh=0.05, nms_thresh=0.5, pre_nms_top_n=1000, detections_per_im=100, nms_eta=1.0), + num_convs_per_octave=4, + num_chan=256, + max_level=7, + min_level=3, + prior_prob=0.01, + base_scale=4, + num_scales_per_octave=3) + # fpn + fpn = FPN( + max_level=7, + min_level=3, + num_chan=256, + spatial_scale=[0.03125, 0.0625, 0.125], + has_extra_convs=True) + # body_feats + body_feats, spatial_scale = fpn.get_output(body_feats) + # inputs, outputs, context_prog + inputs = {'image': var_prefix + image.name, 'im_info': var_prefix + im_info.name} + if phase == 'predict': + pred = retina_head.get_prediction(body_feats, spatial_scale, im_info) + outputs = {'bbox_out': var_prefix + pred.name} + else: + outputs = {'body_features': [var_prefix + var.name for key, var in body_feats.items()]} + + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + + global_vars = context_prog.global_block().vars + inputs = {key: global_vars[value] for key, value in inputs.items()} + outputs = { + key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] + for key, value in outputs.items() + } + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + return inputs, outputs, context_prog + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def object_detection(self, + paths=None, + images=None, + use_gpu=False, + batch_size=1, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + visualization (bool): whether to save result as images. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_images = list() + paths = paths if paths else list() + for yield_data in test_reader(paths, images): + all_images.append(yield_data) + + images_num = len(all_images) + loop_num = int(np.ceil(images_num / batch_size)) + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_images[handle_id + image_id]) + except: + pass + padding_image, padding_info = padding_minibatch(batch_data, coarsest_stride=32, use_padded_im_info=True) + padding_image_tensor = PaddleTensor(padding_image.copy()) + padding_info_tensor = PaddleTensor(padding_info.copy()) + feed_list = [padding_image_tensor, padding_info_tensor] + if use_gpu: + data_out = self.gpu_predictor.run(feed_list) + else: + data_out = self.cpu_predictor.run(feed_list) + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=handle_id, + visualization=visualization) + res += output + return res + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") + + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + + def check_input_data(self, args): + input_data = list() + if args.input_path: + input_data = [args.input_path] + elif args.input_file: + if not os.path.exists(args.input_file): + raise RuntimeError("File %s is not exist." % args.input_file) + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + return input_data + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + self.parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + input_data = self.check_input_data(args) + if len(input_data) == 0: + self.parser.print_help() + exit(1) + else: + for image_path in input_data: + if not os.path.exists(image_path): + raise RuntimeError("File %s or %s is not exist." % image_path) + return self.object_detection(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py similarity index 100% rename from hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py rename to modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..167508096e96cbda4645bb4b20cb6b080ce5f37d --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py @@ -0,0 +1,162 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = [ + 'base64_to_cv2', + 'load_label_info', + 'postprocess', +] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + image.save(save_name) + + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output_dir = output_dir if output_dir else os.path.join(os.getcwd(), 'detection_result') + if visualization: + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..77a3f7f4c7b16c3f9c65c46fc93eb394befa5110 --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py new file mode 100644 index 0000000000000000000000000000000000000000..1cde9e3202136fefc81c21812f805c456a12d548 --- /dev/null +++ b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py @@ -0,0 +1,381 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Normal, Constant +from paddle.fluid.regularizer import L2Decay + +__all__ = ['AnchorGenerator', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'RetinaHead'] + + +class AnchorGenerator(object): + # __op__ = fluid.layers.anchor_generator + def __init__(self, + stride=[16.0, 16.0], + anchor_sizes=[32, 64, 128, 256, 512], + aspect_ratios=[0.5, 1., 2.], + variance=[1., 1., 1., 1.]): + self.anchor_sizes = anchor_sizes + self.aspect_ratios = aspect_ratios + self.variance = variance + self.stride = stride + + +class RetinaTargetAssign(object): + # __op__ = fluid.layers.retinanet_target_assign + def __init__(self, positive_overlap=0.5, negative_overlap=0.4): + self.positive_overlap = positive_overlap + self.negative_overlap = negative_overlap + + +class RetinaOutputDecoder(object): + # __op__ = fluid.layers.retinanet_detection_output + def __init__(self, score_thresh=0.05, nms_thresh=0.3, pre_nms_top_n=1000, detections_per_im=100, nms_eta=1.0): + super(RetinaOutputDecoder, self).__init__() + self.score_threshold = score_thresh + self.nms_threshold = nms_thresh + self.nms_top_k = pre_nms_top_n + self.keep_top_k = detections_per_im + self.nms_eta = nms_eta + + +class RetinaHead(object): + """ + Retina Head + + Args: + anchor_generator (object): `AnchorGenerator` instance + target_assign (object): `RetinaTargetAssign` instance + output_decoder (object): `RetinaOutputDecoder` instance + num_convs_per_octave (int): Number of convolution layers in each octave + num_chan (int): Number of octave output channels + max_level (int): Highest level of FPN output + min_level (int): Lowest level of FPN output + prior_prob (float): Used to set the bias init for the class prediction layer + base_scale (int): Anchors are generated based on this scale + num_scales_per_octave (int): Number of anchor scales per octave + num_classes (int): Number of classes + gamma (float): The parameter in focal loss + alpha (float): The parameter in focal loss + sigma (float): The parameter in smooth l1 loss + """ + __inject__ = ['anchor_generator', 'target_assign', 'output_decoder'] + __shared__ = ['num_classes'] + + def __init__(self, + anchor_generator=AnchorGenerator(), + target_assign=RetinaTargetAssign(), + output_decoder=RetinaOutputDecoder(), + num_convs_per_octave=4, + num_chan=256, + max_level=7, + min_level=3, + prior_prob=0.01, + base_scale=4, + num_scales_per_octave=3, + num_classes=81, + gamma=2.0, + alpha=0.25, + sigma=3.0151134457776365): + self.anchor_generator = anchor_generator + self.target_assign = target_assign + self.output_decoder = output_decoder + self.num_convs_per_octave = num_convs_per_octave + self.num_chan = num_chan + self.max_level = max_level + self.min_level = min_level + self.prior_prob = prior_prob + self.base_scale = base_scale + self.num_scales_per_octave = num_scales_per_octave + self.num_classes = num_classes + self.gamma = gamma + self.alpha = alpha + self.sigma = sigma + + def _class_subnet(self, body_feats, spatial_scale): + """ + Get class predictions of all level FPN level. + + Args: + fpn_dict(dict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + + Returns: + cls_pred_input(list): Class prediction of all input fpn levels. + """ + assert len(body_feats) == self.max_level - self.min_level + 1 + fpn_name_list = list(body_feats.keys()) + cls_pred_list = [] + for lvl in range(self.min_level, self.max_level + 1): + fpn_name = fpn_name_list[self.max_level - lvl] + subnet_blob = body_feats[fpn_name] + for i in range(self.num_convs_per_octave): + conv_name = 'retnet_cls_conv_n{}_fpn{}'.format(i, lvl) + conv_share_name = 'retnet_cls_conv_n{}_fpn{}'.format(i, self.min_level) + subnet_blob_in = subnet_blob + subnet_blob = fluid.layers.conv2d( + input=subnet_blob_in, + num_filters=self.num_chan, + filter_size=3, + stride=1, + padding=1, + act='relu', + name=conv_name, + param_attr=ParamAttr(name=conv_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=conv_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + + # class prediction + cls_name = 'retnet_cls_pred_fpn{}'.format(lvl) + cls_share_name = 'retnet_cls_pred_fpn{}'.format(self.min_level) + num_anchors = self.num_scales_per_octave * len(self.anchor_generator.aspect_ratios) + cls_dim = num_anchors * (self.num_classes - 1) + # bias initialization: b = -log((1 - pai) / pai) + bias_init = float(-np.log((1 - self.prior_prob) / self.prior_prob)) + out_cls = fluid.layers.conv2d( + input=subnet_blob, + num_filters=cls_dim, + filter_size=3, + stride=1, + padding=1, + act=None, + name=cls_name, + param_attr=ParamAttr(name=cls_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr( + name=cls_share_name + '_b', + initializer=Constant(value=bias_init), + learning_rate=2., + regularizer=L2Decay(0.))) + cls_pred_list.append(out_cls) + + return cls_pred_list + + def _bbox_subnet(self, body_feats, spatial_scale): + """ + Get bounding box predictions of all level FPN level. + + Args: + fpn_dict(dict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + + Returns: + bbox_pred_input(list): Bounding box prediction of all input fpn + levels. + """ + assert len(body_feats) == self.max_level - self.min_level + 1 + fpn_name_list = list(body_feats.keys()) + bbox_pred_list = [] + for lvl in range(self.min_level, self.max_level + 1): + fpn_name = fpn_name_list[self.max_level - lvl] + subnet_blob = body_feats[fpn_name] + for i in range(self.num_convs_per_octave): + conv_name = 'retnet_bbox_conv_n{}_fpn{}'.format(i, lvl) + conv_share_name = 'retnet_bbox_conv_n{}_fpn{}'.format(i, self.min_level) + subnet_blob_in = subnet_blob + subnet_blob = fluid.layers.conv2d( + input=subnet_blob_in, + num_filters=self.num_chan, + filter_size=3, + stride=1, + padding=1, + act='relu', + name=conv_name, + param_attr=ParamAttr(name=conv_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=conv_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + + # bbox prediction + bbox_name = 'retnet_bbox_pred_fpn{}'.format(lvl) + bbox_share_name = 'retnet_bbox_pred_fpn{}'.format(self.min_level) + num_anchors = self.num_scales_per_octave * len(self.anchor_generator.aspect_ratios) + bbox_dim = num_anchors * 4 + out_bbox = fluid.layers.conv2d( + input=subnet_blob, + num_filters=bbox_dim, + filter_size=3, + stride=1, + padding=1, + act=None, + name=bbox_name, + param_attr=ParamAttr(name=bbox_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), + bias_attr=ParamAttr(name=bbox_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) + bbox_pred_list.append(out_bbox) + return bbox_pred_list + + def _anchor_generate(self, body_feats, spatial_scale): + """ + Get anchor boxes of all level FPN level. + + Args: + fpn_dict(dict): A dictionary represents the output of FPN with their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + + Return: + anchor_input(list): Anchors of all input fpn levels with shape of. + anchor_var_input(list): Anchor variance of all input fpn levels with shape. + """ + assert len(body_feats) == self.max_level - self.min_level + 1 + fpn_name_list = list(body_feats.keys()) + anchor_list = [] + anchor_var_list = [] + for lvl in range(self.min_level, self.max_level + 1): + anchor_sizes = [] + stride = int(1 / spatial_scale[self.max_level - lvl]) + for octave in range(self.num_scales_per_octave): + anchor_size = stride * (2**(float(octave) / float(self.num_scales_per_octave))) * self.base_scale + anchor_sizes.append(anchor_size) + fpn_name = fpn_name_list[self.max_level - lvl] + anchor, anchor_var = fluid.layers.anchor_generator( + input=body_feats[fpn_name], + anchor_sizes=anchor_sizes, + aspect_ratios=self.anchor_generator.aspect_ratios, + stride=[stride, stride], + variance=self.anchor_generator.variance) + anchor_list.append(anchor) + anchor_var_list.append(anchor_var) + return anchor_list, anchor_var_list + + def _get_output(self, body_feats, spatial_scale): + """ + Get class, bounding box predictions and anchor boxes of all level FPN level. + + Args: + fpn_dict(dict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + + Returns: + cls_pred_input(list): Class prediction of all input fpn levels. + bbox_pred_input(list): Bounding box prediction of all input fpn + levels. + anchor_input(list): Anchors of all input fpn levels with shape of. + anchor_var_input(list): Anchor variance of all input fpn levels with + shape. + """ + assert len(body_feats) == self.max_level - self.min_level + 1 + # class subnet + cls_pred_list = self._class_subnet(body_feats, spatial_scale) + # bbox subnet + bbox_pred_list = self._bbox_subnet(body_feats, spatial_scale) + #generate anchors + anchor_list, anchor_var_list = self._anchor_generate(body_feats, spatial_scale) + cls_pred_reshape_list = [] + bbox_pred_reshape_list = [] + anchor_reshape_list = [] + anchor_var_reshape_list = [] + for i in range(self.max_level - self.min_level + 1): + cls_pred_transpose = fluid.layers.transpose(cls_pred_list[i], perm=[0, 2, 3, 1]) + cls_pred_reshape = fluid.layers.reshape(cls_pred_transpose, shape=(0, -1, self.num_classes - 1)) + bbox_pred_transpose = fluid.layers.transpose(bbox_pred_list[i], perm=[0, 2, 3, 1]) + bbox_pred_reshape = fluid.layers.reshape(bbox_pred_transpose, shape=(0, -1, 4)) + anchor_reshape = fluid.layers.reshape(anchor_list[i], shape=(-1, 4)) + anchor_var_reshape = fluid.layers.reshape(anchor_var_list[i], shape=(-1, 4)) + cls_pred_reshape_list.append(cls_pred_reshape) + bbox_pred_reshape_list.append(bbox_pred_reshape) + anchor_reshape_list.append(anchor_reshape) + anchor_var_reshape_list.append(anchor_var_reshape) + output = {} + output['cls_pred'] = cls_pred_reshape_list + output['bbox_pred'] = bbox_pred_reshape_list + output['anchor'] = anchor_reshape_list + output['anchor_var'] = anchor_var_reshape_list + return output + + def get_prediction(self, body_feats, spatial_scale, im_info): + """ + Get prediction bounding box in test stage. + + Args: + fpn_dict(dict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the + number of input images, each element consists of im_height, + im_width, im_scale. + + Returns: + pred_result(Variable): Prediction result with shape [N, 6]. Each + row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. + N is the total number of prediction. + """ + output = self._get_output(body_feats, spatial_scale) + cls_pred_reshape_list = output['cls_pred'] + bbox_pred_reshape_list = output['bbox_pred'] + anchor_reshape_list = output['anchor'] + for i in range(self.max_level - self.min_level + 1): + cls_pred_reshape_list[i] = fluid.layers.sigmoid(cls_pred_reshape_list[i]) + pred_result = fluid.layers.retinanet_detection_output( + bboxes=bbox_pred_reshape_list, + scores=cls_pred_reshape_list, + anchors=anchor_reshape_list, + im_info=im_info, + score_threshold=self.output_decoder.score_threshold, + nms_threshold=self.output_decoder.nms_threshold, + nms_top_k=self.output_decoder.nms_top_k, + keep_top_k=self.output_decoder.keep_top_k, + nms_eta=self.output_decoder.nms_eta) + return pred_result + + def get_loss(self, body_feats, spatial_scale, im_info, gt_box, gt_label, is_crowd): + """ + Calculate the loss of retinanet. + Args: + fpn_dict(dict): A dictionary represents the output of FPN with + their name. + spatial_scale(list): A list of multiplicative spatial scale factor. + im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the + number of input images, each element consists of im_height, + im_width, im_scale. + gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. + M is the number of groundtruth. + gt_label(Variable): The ground-truth labels with shape [M, 1]. + M is the number of groundtruth. + is_crowd(Variable): Indicates groud-truth is crowd or not with + shape [M, 1]. M is the number of groundtruth. + + Returns: + Type: dict + loss_cls(Variable): focal loss. + loss_bbox(Variable): smooth l1 loss. + """ + output = self._get_output(body_feats, spatial_scale) + cls_pred_reshape_list = output['cls_pred'] + bbox_pred_reshape_list = output['bbox_pred'] + anchor_reshape_list = output['anchor'] + anchor_var_reshape_list = output['anchor_var'] + + cls_pred_input = fluid.layers.concat(cls_pred_reshape_list, axis=1) + bbox_pred_input = fluid.layers.concat(bbox_pred_reshape_list, axis=1) + anchor_input = fluid.layers.concat(anchor_reshape_list, axis=0) + anchor_var_input = fluid.layers.concat(anchor_var_reshape_list, axis=0) + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight, fg_num = \ + fluid.layers.rpn_target_assign( + bbox_pred=bbox_pred_input, + cls_logits=cls_pred_input, + anchor_box=anchor_input, + anchor_var=anchor_var_input, + gt_boxes=gt_box, + gt_labels=gt_label, + is_crowd=is_crowd, + im_info=im_info, + num_classes=self.num_classes - 1, + rpn_batch_size_per_im=self.target_assign.rpn_batch_size_per_im, + rpn_straddle_thresh=self.target_assign.rpn_straddle_thresh, + rpn_fg_fraction=self.target_assign.rpn_fg_fraction, + rpn_positive_overlap=self.target_assign.rpn_positive_overlap, + rpn_negative_overlap=self.target_assign.rpn_negative_overlap, + use_random=self.target_assign.use_random) + fg_num = fluid.layers.reduce_sum(fg_num, name='fg_num') + score_tgt = fluid.layers.cast(score_tgt, 'int32') + loss_cls = fluid.layers.sigmoid_focal_loss( + x=score_pred, label=score_tgt, fg_num=fg_num, gamma=self.gamma, alpha=self.alpha) + loss_cls = fluid.layers.reduce_sum(loss_cls, name='loss_cls') + loss_bbox = fluid.layers.smooth_l1( + x=loc_pred, y=loc_tgt, sigma=self.sigma, inside_weight=bbox_weight, outside_weight=bbox_weight) + loss_bbox = fluid.layers.reduce_sum(loss_bbox, name='loss_bbox') + loss_bbox = loss_bbox / fg_num + return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md similarity index 100% rename from hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md rename to modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md diff --git a/hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/__init__.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/__init__.py rename to modules/image/object_detection/ssd_mobilenet_v1_pascal/__init__.py diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/config.yml b/modules/image/object_detection/ssd_mobilenet_v1_pascal/config.yml similarity index 100% rename from hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/config.yml rename to modules/image/object_detection/ssd_mobilenet_v1_pascal/config.yml diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..788b07a845f17dcb6cd7afeee32d44197077b77a --- /dev/null +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py @@ -0,0 +1,197 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +import random +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image +from paddle import fluid + +__all__ = ['reader'] + + +class DecodeImage(object): + def __init__(self, to_rgb=True, with_mixup=False): + """ Transform the image data to numpy format. + + Args: + to_rgb (bool): whether to convert BGR to RGB + with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score + """ + self.to_rgb = to_rgb + self.with_mixup = with_mixup + + def __call__(self, im): + if self.to_rgb: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + return im + + +class ResizeImage(object): + def __init__(self, target_size=0, max_size=0, interp=cv2.INTER_LINEAR, use_cv2=True): + """ + Rescale image to the specified target size, and capped at max_size + if max_size != 0. + If target_size is list, selected a scale randomly as the specified + target size. + + Args: + target_size (int|list): the target size of image's short side, + multi-scale training is adopted when type is list. + max_size (int): the max size of image + interp (int): the interpolation method + use_cv2 (bool): use the cv2 interpolation method or use PIL + interpolation method + """ + self.max_size = int(max_size) + self.interp = int(interp) + self.use_cv2 = use_cv2 + self.target_size = target_size + + def __call__(self, im): + if not isinstance(im, np.ndarray): + raise TypeError("{}: image type is not numpy.".format(self)) + if len(im.shape) != 3: + raise ValueError('{}: image is not 3-dimensional.'.format(self)) + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if isinstance(self.target_size, list): + # Case for multi-scale training + selected_size = random.choice(self.target_size) + else: + selected_size = self.target_size + if float(im_size_min) == 0: + raise ZeroDivisionError('{}: min size of image is 0'.format(self)) + if self.max_size != 0: + im_scale = float(selected_size) / float(im_size_min) + # Prevent the biggest axis from being more than max_size + if np.round(im_scale * im_size_max) > self.max_size: + im_scale = float(self.max_size) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + + resize_w = im_scale_x * float(im_shape[1]) + resize_h = im_scale_y * float(im_shape[0]) + im_info = [resize_h, resize_w, im_scale] + else: + im_scale_x = float(selected_size) / float(im_shape[1]) + im_scale_y = float(selected_size) / float(im_shape[0]) + + resize_w = selected_size + resize_h = selected_size + + if self.use_cv2: + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) + else: + if self.max_size != 0: + raise TypeError('If you set max_size to cap the maximum size of image,' + 'please set use_cv2 to True to resize the image.') + im = im.astype('uint8') + im = Image.fromarray(im) + im = im.resize((int(resize_w), int(resize_h)), self.interp) + im = np.array(im) + + return im + + +class NormalizeImage(object): + def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1], is_scale=True, is_channel_first=True): + """ + Args: + mean (list): the pixel mean + std (list): the pixel variance + """ + self.mean = mean + self.std = std + self.is_scale = is_scale + self.is_channel_first = is_channel_first + + def __call__(self, im): + """Normalize the image. + + Operators: + 1.(optional) Scale the image to [0,1] + 2. Each pixel minus mean and is divided by std + """ + im = im.astype(np.float32, copy=False) + if self.is_channel_first: + mean = np.array(self.mean)[:, np.newaxis, np.newaxis] + std = np.array(self.std)[:, np.newaxis, np.newaxis] + else: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + return im + + +class Permute(object): + def __init__(self, to_bgr=True, channel_first=True): + """ + Change the channel. + + Args: + to_bgr (bool): confirm whether to convert RGB to BGR + channel_first (bool): confirm whether to change channel + """ + self.to_bgr = to_bgr + self.channel_first = channel_first + + def __call__(self, im): + if self.channel_first: + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + if self.to_bgr: + im = im[[2, 1, 0], :, :] + return im + + +def reader(paths=[], + images=None, + decode_image=DecodeImage(to_rgb=True, with_mixup=False), + resize_image=ResizeImage(target_size=512, interp=1, max_size=0, use_cv2=False), + permute_image=Permute(to_bgr=False), + normalize_image=NormalizeImage(mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + decode_image (class object): instance of + resize_image (class object): instance of + permute_image (class object): instance of + normalize_image (class object): instance of + """ + img_list = [] + if paths is not None: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + decode_image = DecodeImage(to_rgb=True, with_mixup=False) + resize_image = ResizeImage(target_size=300, interp=1, max_size=0, use_cv2=False) + permute_image = Permute() + normalize_image = NormalizeImage( + mean=[127.5, 127.5, 127.5], std=[127.502231, 127.502231, 127.502231], is_scale=False) + + for img in img_list: + preprocessed_img = decode_image(img) + preprocessed_img = resize_image(preprocessed_img) + preprocessed_img = permute_image(preprocessed_img) + preprocessed_img = normalize_image(preprocessed_img) + yield [preprocessed_img] diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/label_file.txt b/modules/image/object_detection/ssd_mobilenet_v1_pascal/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/label_file.txt rename to modules/image/object_detection/ssd_mobilenet_v1_pascal/label_file.txt diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..1982036d912e0c2ed4b2d1d8b022fe9512d292de --- /dev/null +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py @@ -0,0 +1,167 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MobileNet'] + + +class MobileNet(object): + """ + MobileNet v1, see https://arxiv.org/abs/1704.04861 + + Args: + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + conv_group_scale (int): scaling factor for convolution groups + with_extra_blocks (bool): if extra blocks should be added + extra_block_filters (list): number of filter for each extra block + class_dim (int): number of class while classification + yolo_v3 (bool): whether to output layers which yolo_v3 needs + """ + __shared__ = ['norm_type', 'weight_prefix_name'] + + def __init__(self, + norm_type='bn', + norm_decay=0., + conv_group_scale=1, + conv_learning_rate=1.0, + with_extra_blocks=False, + extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]], + weight_prefix_name='', + class_dim=1000, + yolo_v3=False): + self.norm_type = norm_type + self.norm_decay = norm_decay + self.conv_group_scale = conv_group_scale + self.conv_learning_rate = conv_learning_rate + self.with_extra_blocks = with_extra_blocks + self.extra_block_filters = extra_block_filters + self.prefix_name = weight_prefix_name + self.class_dim = class_dim + self.yolo_v3 = yolo_v3 + + def _conv_norm(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + parameter_attr = ParamAttr( + learning_rate=self.conv_learning_rate, initializer=fluid.initializer.MSRA(), name=name + "_weights") + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=parameter_attr, + bias_attr=False) + + bn_name = name + "_bn" + norm_decay = self.norm_decay + bn_param_attr = ParamAttr(regularizer=L2Decay(norm_decay), name=bn_name + '_scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(norm_decay), name=bn_name + '_offset') + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def depthwise_separable(self, input, num_filters1, num_filters2, num_groups, stride, scale, name=None): + depthwise_conv = self._conv_norm( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False, + name=name + "_dw") + + pointwise_conv = self._conv_norm( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + return pointwise_conv + + def _extra_block(self, input, num_filters1, num_filters2, num_groups, stride, name=None): + pointwise_conv = self._conv_norm( + input=input, + filter_size=1, + num_filters=int(num_filters1), + stride=1, + num_groups=int(num_groups), + padding=0, + name=name + "_extra1") + normal_conv = self._conv_norm( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2), + stride=2, + num_groups=int(num_groups), + padding=1, + name=name + "_extra2") + return normal_conv + + def __call__(self, input): + scale = self.conv_group_scale + blocks = [] + # input 1/1 + out = self._conv_norm(input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") + # 1/2 + out = self.depthwise_separable(out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") + out = self.depthwise_separable(out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") + # 1/4 + out = self.depthwise_separable(out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") + out = self.depthwise_separable(out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") + # 1/8 + blocks.append(out) + out = self.depthwise_separable(out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") + out = self.depthwise_separable(out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") + # 1/16 + blocks.append(out) + for i in range(5): + out = self.depthwise_separable(out, 512, 512, 512, 1, scale, name=self.prefix_name + "conv5_" + str(i + 1)) + module11 = out + + out = self.depthwise_separable(out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") + # 1/32 + out = self.depthwise_separable(out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") + module13 = out + blocks.append(out) + if self.yolo_v3: + return blocks + if not self.with_extra_blocks: + out = fluid.layers.pool2d(input=out, pool_type='avg', global_pooling=True) + out = fluid.layers.fc( + input=out, + size=self.class_dim, + param_attr=ParamAttr(initializer=fluid.initializer.MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + out = fluid.layers.softmax(out) + blocks.append(out) + return blocks + + num_filters = self.extra_block_filters + module14 = self._extra_block(module13, num_filters[0][0], num_filters[0][1], 1, 2, self.prefix_name + "conv7_1") + module15 = self._extra_block(module14, num_filters[1][0], num_filters[1][1], 1, 2, self.prefix_name + "conv7_2") + module16 = self._extra_block(module15, num_filters[2][0], num_filters[2][1], 1, 2, self.prefix_name + "conv7_3") + module17 = self._extra_block(module16, num_filters[3][0], num_filters[3][1], 1, 2, self.prefix_name + "conv7_4") + return module11, module13, module14, module15, module16, module17 diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py new file mode 100644 index 0000000000000000000000000000000000000000..36ebb4928974155a8ae6b68a897e3f5fc7b2430c --- /dev/null +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py @@ -0,0 +1,270 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import yaml +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet +from ssd_mobilenet_v1_pascal.processor import load_label_info, postprocess, base64_to_cv2 +from ssd_mobilenet_v1_pascal.data_feed import reader + + +@moduleinfo( + name="ssd_mobilenet_v1_pascal", + version="1.1.1", + type="cv/object_detection", + summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class SSDMobileNetv1(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "ssd_mobilenet_v1_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.model_config = None + self._set_config() + + def _set_config(self): + # predictor config setting. + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + # model config setting. + if not self.model_config: + with open(os.path.join(self.directory, 'config.yml')) as fp: + self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) + + self.multi_box_head_config = self.model_config['MultiBoxHead'] + self.output_decoder_config = self.model_config['SSDOutputDecoder'] + self.mobilenet_config = self.model_config['MobileNet'] + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 300, 300], dtype='float32') + # backbone + backbone = MobileNet(**self.mobilenet_config) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # names of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # names of outputs + if get_prediction: + locs, confs, box, box_var = fluid.layers.multi_box_head( + inputs=body_feats, image=image, num_classes=21, **self.multi_box_head_config) + pred = fluid.layers.detection_output( + loc=locs, scores=confs, prior_box=box, prior_box_var=box_var, **self.output_decoder_config) + outputs = {'bbox_out': [var_prefix + pred.name]} + else: + outputs = {'body_features': [var_prefix + var.name for var in body_feats]} + + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + outputs = { + out_key: [context_prog.global_block().vars[varname] for varname in out_value] + for out_key, out_value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + data=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + if data and 'image' in data: + paths += data['image'] + + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ff4eb9fe5fd596233ef90c1a0a5baa9d0ff0e56f --- /dev/null +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py @@ -0,0 +1,159 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """ + Get save image name from source image path. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + bbox[0] = bbox[0] * org_img_width + bbox[1] = bbox[1] * org_img_height + bbox[2] = bbox[2] * org_img_width + bbox[3] = bbox[3] * org_img_height + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md b/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md rename to modules/image/object_detection/ssd_vgg16_300_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/__init__.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/__init__.py rename to modules/image/object_detection/ssd_vgg16_300_coco2017/__init__.py diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/config.yml b/modules/image/object_detection/ssd_vgg16_300_coco2017/config.yml similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/config.yml rename to modules/image/object_detection/ssd_vgg16_300_coco2017/config.yml diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..9fad7c95ec6207ad758b75b4799e5698509f07e6 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py @@ -0,0 +1,193 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +import random +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image +from paddle import fluid + +__all__ = ['reader'] + + +class DecodeImage(object): + def __init__(self, to_rgb=True, with_mixup=False): + """ Transform the image data to numpy format. + + Args: + to_rgb (bool): whether to convert BGR to RGB + with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score + """ + self.to_rgb = to_rgb + self.with_mixup = with_mixup + + def __call__(self, im): + if self.to_rgb: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + return im + + +class ResizeImage(object): + def __init__(self, target_size=0, max_size=0, interp=cv2.INTER_LINEAR, use_cv2=True): + """ + Rescale image to the specified target size, and capped at max_size + if max_size != 0. + If target_size is list, selected a scale randomly as the specified + target size. + + Args: + target_size (int|list): the target size of image's short side, + multi-scale training is adopted when type is list. + max_size (int): the max size of image + interp (int): the interpolation method + use_cv2 (bool): use the cv2 interpolation method or use PIL + interpolation method + """ + self.max_size = int(max_size) + self.interp = int(interp) + self.use_cv2 = use_cv2 + self.target_size = target_size + + def __call__(self, im): + if not isinstance(im, np.ndarray): + raise TypeError("{}: image type is not numpy.".format(self)) + if len(im.shape) != 3: + raise ValueError('{}: image is not 3-dimensional.'.format(self)) + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if isinstance(self.target_size, list): + # Case for multi-scale training + selected_size = random.choice(self.target_size) + else: + selected_size = self.target_size + if float(im_size_min) == 0: + raise ZeroDivisionError('{}: min size of image is 0'.format(self)) + if self.max_size != 0: + im_scale = float(selected_size) / float(im_size_min) + # Prevent the biggest axis from being more than max_size + if np.round(im_scale * im_size_max) > self.max_size: + im_scale = float(self.max_size) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + + resize_w = im_scale_x * float(im_shape[1]) + resize_h = im_scale_y * float(im_shape[0]) + im_info = [resize_h, resize_w, im_scale] + else: + im_scale_x = float(selected_size) / float(im_shape[1]) + im_scale_y = float(selected_size) / float(im_shape[0]) + + resize_w = selected_size + resize_h = selected_size + + if self.use_cv2: + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) + else: + if self.max_size != 0: + raise TypeError('If you set max_size to cap the maximum size of image,' + 'please set use_cv2 to True to resize the image.') + im = im.astype('uint8') + im = Image.fromarray(im) + im = im.resize((int(resize_w), int(resize_h)), self.interp) + im = np.array(im) + + return im + + +class NormalizeImage(object): + def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1], is_scale=True, is_channel_first=True): + """ + Args: + mean (list): the pixel mean + std (list): the pixel variance + """ + self.mean = mean + self.std = std + self.is_scale = is_scale + self.is_channel_first = is_channel_first + + def __call__(self, im): + """Normalize the image. + + Operators: + 1.(optional) Scale the image to [0,1] + 2. Each pixel minus mean and is divided by std + """ + im = im.astype(np.float32, copy=False) + if self.is_channel_first: + mean = np.array(self.mean)[:, np.newaxis, np.newaxis] + std = np.array(self.std)[:, np.newaxis, np.newaxis] + else: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + return im + + +class Permute(object): + def __init__(self, to_bgr=True, channel_first=True): + """ + Change the channel. + + Args: + to_bgr (bool): confirm whether to convert RGB to BGR + channel_first (bool): confirm whether to change channel + """ + self.to_bgr = to_bgr + self.channel_first = channel_first + + def __call__(self, im): + if self.channel_first: + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + if self.to_bgr: + im = im[[2, 1, 0], :, :] + return im + + +def reader(paths=[], + images=None, + decode_image=DecodeImage(to_rgb=True, with_mixup=False), + resize_image=ResizeImage(target_size=512, interp=1, max_size=0, use_cv2=False), + permute_image=Permute(to_bgr=False), + normalize_image=NormalizeImage(mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + decode_image (class object): instance of + resize_image (class object): instance of + permute_image (class object): instance of + normalize_image (class object): instance of + """ + img_list = [] + if paths is not None: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + resize_image = ResizeImage(target_size=300, interp=1, max_size=0, use_cv2=False) + + for img in img_list: + preprocessed_img = decode_image(img) + preprocessed_img = resize_image(preprocessed_img) + preprocessed_img = permute_image(preprocessed_img) + preprocessed_img = normalize_image(preprocessed_img) + yield [preprocessed_img] diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/label_file.txt b/modules/image/object_detection/ssd_vgg16_300_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/label_file.txt rename to modules/image/object_detection/ssd_vgg16_300_coco2017/label_file.txt diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e0083b95f7c4e3567fda508109af196e1226d087 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py @@ -0,0 +1,256 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import yaml +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from ssd_vgg16_300_coco2017.vgg import VGG +from ssd_vgg16_300_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from ssd_vgg16_300_coco2017.data_feed import reader + + +@moduleinfo( + name="ssd_vgg16_300_coco2017", + version="1.0.1", + type="cv/object_detection", + summary="SSD with backbone VGG16, trained with dataset COCO.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class SSDVGG16(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "ssd_vgg16_300_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.model_config = None + self._set_config() + + def _set_config(self): + # predictor config setting. + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + # model config setting. + if not self.model_config: + with open(os.path.join(self.directory, 'config.yml')) as fp: + self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) + + self.multi_box_head_config = self.model_config['MultiBoxHead'] + self.output_decoder_config = self.model_config['SSDOutputDecoder'] + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 300, 300], dtype='float32') + # backbone + backbone = VGG(depth=16, with_extra_blocks=True, normalizations=[20., -1, -1, -1, -1, -1]) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # names of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # names of outputs + if get_prediction: + locs, confs, box, box_var = fluid.layers.multi_box_head( + inputs=body_feats, image=image, num_classes=81, **self.multi_box_head_config) + pred = fluid.layers.detection_output( + loc=locs, scores=confs, prior_box=box, prior_box_var=box_var, **self.output_decoder_config) + outputs = {'bbox_out': [var_prefix + pred.name]} + else: + outputs = {'body_features': [var_prefix + var.name for var in body_feats]} + + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + outputs = { + out_key: [context_prog.global_block().vars[varname] for varname in out_value] + for out_key, out_value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ff4eb9fe5fd596233ef90c1a0a5baa9d0ff0e56f --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py @@ -0,0 +1,159 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """ + Get save image name from source image path. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + bbox[0] = bbox[0] * org_img_width + bbox[1] = bbox[1] * org_img_height + bbox[2] = bbox[2] * org_img_width + bbox[3] = bbox[3] * org_img_height + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..d950c6b553d9af29086ba6f942d005920e74c296 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py @@ -0,0 +1,184 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['VGG'] + + +class VGG(object): + """ + VGG, see https://arxiv.org/abs/1409.1556 + + Args: + depth (int): the VGG net depth (16 or 19) + normalizations (list): params list of init scale in l2 norm, skip init + scale if param is -1. + with_extra_blocks (bool): whether or not extra blocks should be added + extra_block_filters (list): in each extra block, params: + [in_channel, out_channel, padding_size, stride_size, filter_size] + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=16, + with_extra_blocks=False, + normalizations=[20., -1, -1, -1, -1, -1], + extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 0, 1, 3], + [128, 256, 0, 1, 3]], + class_dim=1000): + assert depth in [16, 19], "depth {} not in [16, 19]" + self.depth = depth + self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} + self.with_extra_blocks = with_extra_blocks + self.normalizations = normalizations + self.extra_block_filters = extra_block_filters + self.class_dim = class_dim + + def __call__(self, input): + layers = [] + layers += self._vgg_block(input) + + if not self.with_extra_blocks: + return layers[-1] + + layers += self._add_extras_block(layers[-1]) + norm_cfg = self.normalizations + for k, v in enumerate(layers): + if not norm_cfg[k] == -1: + layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) + + return layers + + def _vgg_block(self, input): + nums = self.depth_cfg[self.depth] + vgg_base = [64, 128, 256, 512, 512] + conv = input + res_layer = [] + layers = [] + for k, v in enumerate(vgg_base): + conv = self._conv_block(conv, v, nums[k], name="conv{}_".format(k + 1)) + layers.append(conv) + if self.with_extra_blocks: + if k == 4: + conv = self._pooling_block(conv, 3, 1, pool_padding=1) + else: + conv = self._pooling_block(conv, 2, 2) + else: + conv = self._pooling_block(conv, 2, 2) + if not self.with_extra_blocks: + fc_dim = 4096 + fc_name = ["fc6", "fc7", "fc8"] + fc1 = fluid.layers.fc( + input=conv, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset")) + fc2 = fluid.layers.fc( + input=fc1, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset")) + out = fluid.layers.fc( + input=fc2, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset")) + out = fluid.layers.softmax(out) + res_layer.append(out) + return [out] + else: + fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") + fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") + return [layers[3], fc7] + + def _add_extras_block(self, input): + cfg = self.extra_block_filters + conv = input + layers = [] + for k, v in enumerate(cfg): + assert len(v) == 5, "extra_block_filters size not fix" + conv = self._extra_block(conv, v[0], v[1], v[2], v[3], v[4], name="conv{}_".format(6 + k)) + layers.append(conv) + + return layers + + def _conv_block(self, input, num_filter, groups, name=None): + conv = input + for i in range(groups): + conv = self._conv_layer( + input=conv, + num_filters=num_filter, + filter_size=3, + stride=1, + padding=1, + act='relu', + name=name + str(i + 1)) + return conv + + def _extra_block(self, input, num_filters1, num_filters2, padding_size, stride_size, filter_size, name=None): + # 1x1 conv + conv_1 = self._conv_layer( + input=input, num_filters=int(num_filters1), filter_size=1, stride=1, act='relu', padding=0, name=name + "1") + + # 3x3 conv + conv_2 = self._conv_layer( + input=conv_1, + num_filters=int(num_filters2), + filter_size=filter_size, + stride=stride_size, + act='relu', + padding=padding_size, + name=name + "2") + return conv_2 + + def _conv_layer(self, + input, + num_filters, + filter_size, + stride, + padding, + dilation=1, + act='relu', + use_cudnn=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + dilation=dilation, + act=act, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + "_biases") if self.with_extra_blocks else False, + name=name + '.conv2d.output.1') + return conv + + def _pooling_block(self, conv, pool_size, pool_stride, pool_padding=0, ceil_mode=True): + pool = fluid.layers.pool2d( + input=conv, + pool_size=pool_size, + pool_type='max', + pool_stride=pool_stride, + pool_padding=pool_padding, + ceil_mode=ceil_mode) + return pool + + def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): + from paddle.fluid.layer_helper import LayerHelper + from paddle.fluid.initializer import Constant + helper = LayerHelper("Scale") + l2_norm = fluid.layers.l2_normalize(input, axis=1) # l2 norm along channel + shape = [1] if channel_shared else [input.shape[1]] + scale = helper.create_parameter( + attr=helper.param_attr, shape=shape, dtype=input.dtype, default_initializer=Constant(init_scale)) + out = fluid.layers.elementwise_mul( + x=l2_norm, y=scale, axis=-1 if channel_shared else 1, name="conv4_3_norm_scale") + return out diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md b/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md rename to modules/image/object_detection/ssd_vgg16_512_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/__init__.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/__init__.py rename to modules/image/object_detection/ssd_vgg16_512_coco2017/__init__.py diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/config.yml b/modules/image/object_detection/ssd_vgg16_512_coco2017/config.yml similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/config.yml rename to modules/image/object_detection/ssd_vgg16_512_coco2017/config.yml diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..782b55f0264070d346904abc4702df9f65e1606a --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py @@ -0,0 +1,191 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os +import random +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image +from paddle import fluid + +__all__ = ['reader'] + + +class DecodeImage(object): + def __init__(self, to_rgb=True, with_mixup=False): + """ Transform the image data to numpy format. + + Args: + to_rgb (bool): whether to convert BGR to RGB + with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score + """ + self.to_rgb = to_rgb + self.with_mixup = with_mixup + + def __call__(self, im): + if self.to_rgb: + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + return im + + +class ResizeImage(object): + def __init__(self, target_size=0, max_size=0, interp=cv2.INTER_LINEAR, use_cv2=True): + """ + Rescale image to the specified target size, and capped at max_size + if max_size != 0. + If target_size is list, selected a scale randomly as the specified + target size. + + Args: + target_size (int|list): the target size of image's short side, + multi-scale training is adopted when type is list. + max_size (int): the max size of image + interp (int): the interpolation method + use_cv2 (bool): use the cv2 interpolation method or use PIL + interpolation method + """ + self.max_size = int(max_size) + self.interp = int(interp) + self.use_cv2 = use_cv2 + self.target_size = target_size + + def __call__(self, im): + if not isinstance(im, np.ndarray): + raise TypeError("{}: image type is not numpy.".format(self)) + if len(im.shape) != 3: + raise ValueError('{}: image is not 3-dimensional.'.format(self)) + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if isinstance(self.target_size, list): + # Case for multi-scale training + selected_size = random.choice(self.target_size) + else: + selected_size = self.target_size + if float(im_size_min) == 0: + raise ZeroDivisionError('{}: min size of image is 0'.format(self)) + if self.max_size != 0: + im_scale = float(selected_size) / float(im_size_min) + # Prevent the biggest axis from being more than max_size + if np.round(im_scale * im_size_max) > self.max_size: + im_scale = float(self.max_size) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + + resize_w = im_scale_x * float(im_shape[1]) + resize_h = im_scale_y * float(im_shape[0]) + im_info = [resize_h, resize_w, im_scale] + else: + im_scale_x = float(selected_size) / float(im_shape[1]) + im_scale_y = float(selected_size) / float(im_shape[0]) + + resize_w = selected_size + resize_h = selected_size + + if self.use_cv2: + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) + else: + if self.max_size != 0: + raise TypeError('If you set max_size to cap the maximum size of image,' + 'please set use_cv2 to True to resize the image.') + im = im.astype('uint8') + im = Image.fromarray(im) + im = im.resize((int(resize_w), int(resize_h)), self.interp) + im = np.array(im) + + return im + + +class NormalizeImage(object): + def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1], is_scale=True, is_channel_first=True): + """ + Args: + mean (list): the pixel mean + std (list): the pixel variance + """ + self.mean = mean + self.std = std + self.is_scale = is_scale + self.is_channel_first = is_channel_first + + def __call__(self, im): + """Normalize the image. + + Operators: + 1.(optional) Scale the image to [0,1] + 2. Each pixel minus mean and is divided by std + """ + im = im.astype(np.float32, copy=False) + if self.is_channel_first: + mean = np.array(self.mean)[:, np.newaxis, np.newaxis] + std = np.array(self.std)[:, np.newaxis, np.newaxis] + else: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + return im + + +class Permute(object): + def __init__(self, to_bgr=True, channel_first=True): + """ + Change the channel. + + Args: + to_bgr (bool): confirm whether to convert RGB to BGR + channel_first (bool): confirm whether to change channel + """ + self.to_bgr = to_bgr + self.channel_first = channel_first + + def __call__(self, im): + if self.channel_first: + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + if self.to_bgr: + im = im[[2, 1, 0], :, :] + return im + + +def reader(paths=[], + images=None, + decode_image=DecodeImage(to_rgb=True, with_mixup=False), + resize_image=ResizeImage(target_size=512, interp=1, max_size=0, use_cv2=False), + permute_image=Permute(to_bgr=False), + normalize_image=NormalizeImage(mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + decode_image (class object): instance of + resize_image (class object): instance of + permute_image (class object): instance of + normalize_image (class object): instance of + """ + img_list = [] + if paths is not None: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for img in img_list: + preprocessed_img = decode_image(img) + preprocessed_img = resize_image(preprocessed_img) + preprocessed_img = permute_image(preprocessed_img) + preprocessed_img = normalize_image(preprocessed_img) + yield [preprocessed_img] diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/label_file.txt b/modules/image/object_detection/ssd_vgg16_512_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/label_file.txt rename to modules/image/object_detection/ssd_vgg16_512_coco2017/label_file.txt diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..10c05536bc39f955c21bdd82edb19f52211b0cbd --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py @@ -0,0 +1,270 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import yaml +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from ssd_vgg16_512_coco2017.vgg import VGG +from ssd_vgg16_512_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from ssd_vgg16_512_coco2017.data_feed import reader + + +@moduleinfo( + name="ssd_vgg16_512_coco2017", + version="1.0.1", + type="cv/object_detection", + summary="SSD with backbone VGG16, trained with dataset COCO.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class SSDVGG16_512(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "ssd_vgg16_512_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self.model_config = None + self._set_config() + + def _set_config(self): + # predictor config setting. + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + # model config setting. + if not self.model_config: + with open(os.path.join(self.directory, 'config.yml')) as fp: + self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader) + + self.multi_box_head_config = self.model_config['MultiBoxHead'] + self.output_decoder_config = self.model_config['SSDOutputDecoder'] + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 512, 512], dtype='float32') + # backbone + backbone = VGG( + depth=16, + with_extra_blocks=True, + normalizations=[20., -1, -1, -1, -1, -1, -1], + extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], + [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]]) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # names of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # names of outputs + if get_prediction: + locs, confs, box, box_var = fluid.layers.multi_box_head( + inputs=body_feats, image=image, num_classes=81, **self.multi_box_head_config) + pred = fluid.layers.detection_output( + loc=locs, scores=confs, prior_box=box, prior_box_var=box_var, **self.output_decoder_config) + outputs = {'bbox_out': [var_prefix + pred.name]} + else: + outputs = {'body_features': [var_prefix + var.name for var in body_feats]} + + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + outputs = { + out_key: [context_prog.global_block().vars[varname] for varname in out_value] + for out_key, out_value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ff4eb9fe5fd596233ef90c1a0a5baa9d0ff0e56f --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py @@ -0,0 +1,159 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def get_save_image_name(img, output_dir, image_path): + """ + Get save image name from source image path. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): the path of images. + images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. + data_out (lod_tensor): data produced by executor.run. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + output_dir (str): output directory. + handle_id (int): The number of images that have been handled. + visualization (bool): whether to save as images. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = [] + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + output_i['path'] = org_img_path + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + bbox[0] = bbox[0] * org_img_width + bbox[1] = bbox[1] * org_img_height + bbox[2] = bbox[2] * org_img_width + bbox[3] = bbox[3] * org_img_height + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..d950c6b553d9af29086ba6f942d005920e74c296 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py @@ -0,0 +1,184 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['VGG'] + + +class VGG(object): + """ + VGG, see https://arxiv.org/abs/1409.1556 + + Args: + depth (int): the VGG net depth (16 or 19) + normalizations (list): params list of init scale in l2 norm, skip init + scale if param is -1. + with_extra_blocks (bool): whether or not extra blocks should be added + extra_block_filters (list): in each extra block, params: + [in_channel, out_channel, padding_size, stride_size, filter_size] + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=16, + with_extra_blocks=False, + normalizations=[20., -1, -1, -1, -1, -1], + extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 0, 1, 3], + [128, 256, 0, 1, 3]], + class_dim=1000): + assert depth in [16, 19], "depth {} not in [16, 19]" + self.depth = depth + self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} + self.with_extra_blocks = with_extra_blocks + self.normalizations = normalizations + self.extra_block_filters = extra_block_filters + self.class_dim = class_dim + + def __call__(self, input): + layers = [] + layers += self._vgg_block(input) + + if not self.with_extra_blocks: + return layers[-1] + + layers += self._add_extras_block(layers[-1]) + norm_cfg = self.normalizations + for k, v in enumerate(layers): + if not norm_cfg[k] == -1: + layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) + + return layers + + def _vgg_block(self, input): + nums = self.depth_cfg[self.depth] + vgg_base = [64, 128, 256, 512, 512] + conv = input + res_layer = [] + layers = [] + for k, v in enumerate(vgg_base): + conv = self._conv_block(conv, v, nums[k], name="conv{}_".format(k + 1)) + layers.append(conv) + if self.with_extra_blocks: + if k == 4: + conv = self._pooling_block(conv, 3, 1, pool_padding=1) + else: + conv = self._pooling_block(conv, 2, 2) + else: + conv = self._pooling_block(conv, 2, 2) + if not self.with_extra_blocks: + fc_dim = 4096 + fc_name = ["fc6", "fc7", "fc8"] + fc1 = fluid.layers.fc( + input=conv, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset")) + fc2 = fluid.layers.fc( + input=fc1, + size=fc_dim, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset")) + out = fluid.layers.fc( + input=fc2, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_weights"), + bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset")) + out = fluid.layers.softmax(out) + res_layer.append(out) + return [out] + else: + fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") + fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") + return [layers[3], fc7] + + def _add_extras_block(self, input): + cfg = self.extra_block_filters + conv = input + layers = [] + for k, v in enumerate(cfg): + assert len(v) == 5, "extra_block_filters size not fix" + conv = self._extra_block(conv, v[0], v[1], v[2], v[3], v[4], name="conv{}_".format(6 + k)) + layers.append(conv) + + return layers + + def _conv_block(self, input, num_filter, groups, name=None): + conv = input + for i in range(groups): + conv = self._conv_layer( + input=conv, + num_filters=num_filter, + filter_size=3, + stride=1, + padding=1, + act='relu', + name=name + str(i + 1)) + return conv + + def _extra_block(self, input, num_filters1, num_filters2, padding_size, stride_size, filter_size, name=None): + # 1x1 conv + conv_1 = self._conv_layer( + input=input, num_filters=int(num_filters1), filter_size=1, stride=1, act='relu', padding=0, name=name + "1") + + # 3x3 conv + conv_2 = self._conv_layer( + input=conv_1, + num_filters=int(num_filters2), + filter_size=filter_size, + stride=stride_size, + act='relu', + padding=padding_size, + name=name + "2") + return conv_2 + + def _conv_layer(self, + input, + num_filters, + filter_size, + stride, + padding, + dilation=1, + act='relu', + use_cudnn=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + dilation=dilation, + act=act, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + "_biases") if self.with_extra_blocks else False, + name=name + '.conv2d.output.1') + return conv + + def _pooling_block(self, conv, pool_size, pool_stride, pool_padding=0, ceil_mode=True): + pool = fluid.layers.pool2d( + input=conv, + pool_size=pool_size, + pool_type='max', + pool_stride=pool_stride, + pool_padding=pool_padding, + ceil_mode=ceil_mode) + return pool + + def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): + from paddle.fluid.layer_helper import LayerHelper + from paddle.fluid.initializer import Constant + helper = LayerHelper("Scale") + l2_norm = fluid.layers.l2_normalize(input, axis=1) # l2 norm along channel + shape = [1] if channel_shared else [input.shape[1]] + scale = helper.create_parameter( + attr=helper.param_attr, shape=shape, dtype=input.dtype, default_initializer=Constant(init_scale)) + out = fluid.layers.elementwise_mul( + x=l2_norm, y=scale, axis=-1 if channel_shared else 1, name="conv4_3_norm_scale") + return out diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/README.md b/modules/image/object_detection/yolov3_darknet53_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/README.md rename to modules/image/object_detection/yolov3_darknet53_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/__init__.py b/modules/image/object_detection/yolov3_darknet53_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/__init__.py rename to modules/image/object_detection/yolov3_darknet53_coco2017/__init__.py diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py b/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..fe925fb4c59cb7791eb08fedf143720287c1d424 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py @@ -0,0 +1,121 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import math + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['DarkNet'] + + +class DarkNet(object): + """DarkNet, see https://pjreddie.com/darknet/yolo/ + + Args: + depth (int): network depth, currently only darknet 53 is supported + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + get_prediction (bool): whether to get prediction + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=53, + norm_type='sync_bn', + norm_decay=0., + weight_prefix_name='', + get_prediction=False, + class_dim=1000): + assert depth in [53], "unsupported depth value" + self.depth = depth + self.norm_type = norm_type + self.norm_decay = norm_decay + self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} + self.prefix_name = weight_prefix_name + self.class_dim = class_dim + self.get_prediction = get_prediction + + def _conv_norm(self, input, ch_out, filter_size, stride, padding, act='leaky', name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.offset') + + out = fluid.layers.batch_norm( + input=conv, + act=None, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + # leaky relu here has `alpha` as 0.1, can not be set by + # `act` param in fluid.layers.batch_norm above. + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + + return out + + def _downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): + return self._conv_norm(input, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, name=name) + + def basicblock(self, input, ch_out, name=None): + conv1 = self._conv_norm(input, ch_out=ch_out, filter_size=1, stride=1, padding=0, name=name + ".0") + conv2 = self._conv_norm(conv1, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, name=name + ".1") + out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) + return out + + def layer_warp(self, block_func, input, ch_out, count, name=None): + out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) + for j in six.moves.xrange(1, count): + out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) + return out + + def __call__(self, input): + """ + Get the backbone of DarkNet, that is output for the 5 stages. + """ + stages, block_func = self.depth_cfg[self.depth] + stages = stages[0:5] + conv = self._conv_norm( + input=input, ch_out=32, filter_size=3, stride=1, padding=1, name=self.prefix_name + "yolo_input") + downsample_ = self._downsample( + input=conv, ch_out=conv.shape[1] * 2, name=self.prefix_name + "yolo_input.downsample") + blocks = [] + for i, stage in enumerate(stages): + block = self.layer_warp( + block_func=block_func, + input=downsample_, + ch_out=32 * 2**i, + count=stage, + name=self.prefix_name + "stage.{}".format(i)) + blocks.append(block) + if i < len(stages) - 1: # do not downsaple in the last stage + downsample_ = self._downsample( + input=block, ch_out=block.shape[1] * 2, name=self.prefix_name + "stage.{}.downsample".format(i)) + if self.get_prediction: + pool = fluid.layers.pool2d(input=block, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.softmax(out) + return out + else: + return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py b/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/label_file.txt b/modules/image/object_detection/yolov3_darknet53_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/label_file.txt rename to modules/image/object_detection/yolov3_darknet53_coco2017/label_file.txt diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/module.py b/modules/image/object_detection/yolov3_darknet53_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7886ef4e6af81aeb47d4d710af86dcfcc4d5ad03 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/module.py @@ -0,0 +1,269 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_darknet53_coco2017.darknet import DarkNet +from yolov3_darknet53_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_darknet53_coco2017.data_feed import reader +from yolov3_darknet53_coco2017.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_darknet53_coco2017", + version="1.1.1", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for object detection, with backbone DarkNet53, trained with dataset coco2017.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3DarkNet53Coco2017(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = DarkNet(norm_type='bn', norm_decay=0., depth=53) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head(num_classes=80) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + data=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + if data and 'image' in data: + paths += data['image'] + + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) + im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py b/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..64049e42b1d4dfa67aff606cae4490710b9cd6cc --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py b/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2ec816e51989327ad8006b02e878fb86ab235c31 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py @@ -0,0 +1,325 @@ +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Normal, Constant +from paddle.regularizer import L2Decay +from paddlehub.module.cv_module import Yolov3Module +import paddlehub.process.detect_transforms as T +from paddlehub.module.module import moduleinfo + + +class ConvBNLayer(nn.Layer): + """Basic block for Darknet""" + + def __init__(self, + ch_in: int, + ch_out: int, + filter_size: int = 3, + stride: int = 1, + groups: int = 1, + padding: int = 0, + act: str = 'leakly', + is_test: bool = False): + super(ConvBNLayer, self).__init__() + + self.conv = nn.Conv2d( + ch_in, + ch_out, + filter_size, + padding=padding, + stride=stride, + groups=groups, + weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), + bias_attr=False) + + self.batch_norm = nn.BatchNorm( + num_channels=ch_out, + is_test=is_test, + param_attr=paddle.ParamAttr(initializer=Normal(0., 0.02), regularizer=L2Decay(0.))) + self.act = act + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + out = self.conv(inputs) + out = self.batch_norm(out) + if self.act == "leakly": + out = F.leaky_relu(x=out, negative_slope=0.1) + return out + + +class DownSample(nn.Layer): + """Downsample block for Darknet""" + + def __init__(self, + ch_in: int, + ch_out: int, + filter_size: int = 3, + stride: int = 2, + padding: int = 1, + is_test: bool = False): + super(DownSample, self).__init__() + + self.conv_bn_layer = ConvBNLayer( + ch_in=ch_in, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, is_test=is_test) + self.ch_out = ch_out + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + out = self.conv_bn_layer(inputs) + return out + + +class BasicBlock(nn.Layer): + """Basic residual block for Darknet""" + + def __init__(self, ch_in: int, ch_out: int, is_test: bool = False): + super(BasicBlock, self).__init__() + + self.conv1 = ConvBNLayer(ch_in=ch_in, ch_out=ch_out, filter_size=1, stride=1, padding=0, is_test=is_test) + self.conv2 = ConvBNLayer(ch_in=ch_out, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, is_test=is_test) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + conv1 = self.conv1(inputs) + conv2 = self.conv2(conv1) + out = paddle.elementwise_add(x=inputs, y=conv2, act=None) + return out + + +class LayerWarp(nn.Layer): + """Warp layer composed by basic residual blocks""" + + def __init__(self, ch_in: int, ch_out: int, count: int, is_test: bool = False): + super(LayerWarp, self).__init__() + self.basicblock0 = BasicBlock(ch_in, ch_out, is_test=is_test) + self.res_out_list = [] + for i in range(1, count): + res_out = self.add_sublayer("basic_block_%d" % (i), BasicBlock(ch_out * 2, ch_out, is_test=is_test)) + self.res_out_list.append(res_out) + self.ch_out = ch_out + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.basicblock0(inputs) + for basic_block_i in self.res_out_list: + y = basic_block_i(y) + return y + + +class DarkNet53_conv_body(nn.Layer): + """Darknet53 + Args: + ch_in(int): Input channels, default is 3. + is_test (bool): Set the test mode, default is True. + """ + + def __init__(self, ch_in: int = 3, is_test: bool = False): + super(DarkNet53_conv_body, self).__init__() + self.stages = [1, 2, 8, 8, 4] + self.stages = self.stages[0:5] + + self.conv0 = ConvBNLayer(ch_in=ch_in, ch_out=32, filter_size=3, stride=1, padding=1, is_test=is_test) + + self.downsample0 = DownSample(ch_in=32, ch_out=32 * 2, is_test=is_test) + self.darknet53_conv_block_list = [] + self.downsample_list = [] + ch_in = [64, 128, 256, 512, 1024] + + for i, stage in enumerate(self.stages): + conv_block = self.add_sublayer("stage_%d" % (i), + LayerWarp(int(ch_in[i]), 32 * (2**i), stage, is_test=is_test)) + self.darknet53_conv_block_list.append(conv_block) + + for i in range(len(self.stages) - 1): + downsample = self.add_sublayer( + "stage_%d_downsample" % i, DownSample( + ch_in=32 * (2**(i + 1)), ch_out=32 * (2**(i + 2)), is_test=is_test)) + self.downsample_list.append(downsample) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + out = self.conv0(inputs) + out = self.downsample0(out) + blocks = [] + for i, conv_block_i in enumerate(self.darknet53_conv_block_list): + out = conv_block_i(out) + blocks.append(out) + if i < len(self.stages) - 1: + out = self.downsample_list[i](out) + return blocks[-1:-4:-1] + + +class YoloDetectionBlock(nn.Layer): + """Basic block for Yolov3""" + + def __init__(self, ch_in: int, channel: int, is_test: bool = True): + super(YoloDetectionBlock, self).__init__() + + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2".format(channel) + + self.conv0 = ConvBNLayer(ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) + self.conv1 = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) + self.conv2 = ConvBNLayer(ch_in=channel * 2, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) + self.conv3 = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) + self.route = ConvBNLayer(ch_in=channel * 2, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) + self.tip = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) + + def forward(self, inputs): + out = self.conv0(inputs) + out = self.conv1(out) + out = self.conv2(out) + out = self.conv3(out) + route = self.route(out) + tip = self.tip(route) + return route, tip + + +class Upsample(nn.Layer): + """Upsample block for Yolov3""" + + def __init__(self, scale: int = 2): + super(Upsample, self).__init__() + self.scale = scale + + def forward(self, inputs: paddle.Tensor): + shape_nchw = paddle.to_tensor(inputs.shape) + shape_hw = paddle.slice(shape_nchw, axes=[0], starts=[2], ends=[4]) + shape_hw.stop_gradient = True + in_shape = paddle.cast(shape_hw, dtype='int32') + out_shape = in_shape * self.scale + out_shape.stop_gradient = True + out = F.resize_nearest(input=inputs, scale=self.scale, actual_shape=out_shape) + return out + + +@moduleinfo( + name="yolov3_darknet53_pascalvoc", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="Yolov3 is a detection model, this module is trained with VOC dataset.", + version="1.0.0", + meta=Yolov3Module) +class YOLOv3(nn.Layer): + """YOLOV3 for detection + + Args: + ch_in(int): Input channels, default is 3. + class_num(int): Categories for detection,if dataset is voc, class_num is 20. + ignore_thresh(float): The ignore threshold to ignore confidence loss. + valid_thresh(float): Threshold to filter out bounding boxes with low confidence score. + nms_topk(int): Maximum number of detections to be kept according to the confidences after the filtering + detections based on score_threshold. + nms_posk(int): Number of total bboxes to be kept per image after NMS step. -1 means keeping all bboxes after NMS + step. + nms_thresh (float): The threshold to be used in NMS. Default: 0.3. + is_train (bool): Set the train mode, default is True. + load_checkpoint(str): Whether to load checkpoint. + """ + + def __init__(self, + ch_in: int = 3, + class_num: int = 20, + ignore_thresh: float = 0.7, + valid_thresh: float = 0.005, + nms_topk: int = 400, + nms_posk: int = 100, + nms_thresh: float = 0.45, + is_train: bool = True, + load_checkpoint: str = None): + super(YOLOv3, self).__init__() + + self.is_train = is_train + self.block = DarkNet53_conv_body(ch_in=ch_in, is_test=not self.is_train) + self.block_outputs = [] + self.yolo_blocks = [] + self.route_blocks_2 = [] + self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] + self.class_num = class_num + self.ignore_thresh = ignore_thresh + self.valid_thresh = valid_thresh + self.nms_topk = nms_topk + self.nms_posk = nms_posk + self.nms_thresh = nms_thresh + ch_in_list = [1024, 768, 384] + + for i in range(3): + yolo_block = self.add_sublayer( + "yolo_detecton_block_%d" % (i), + YoloDetectionBlock(ch_in_list[i], channel=512 // (2**i), is_test=not self.is_train)) + self.yolo_blocks.append(yolo_block) + + num_filters = len(self.anchor_masks[i]) * (self.class_num + 5) + block_out = self.add_sublayer( + "block_out_%d" % (i), + nn.Conv2d( + 1024 // (2**i), + num_filters, + 1, + stride=1, + padding=0, + weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), + bias_attr=paddle.ParamAttr(initializer=Constant(0.0), regularizer=L2Decay(0.)))) + self.block_outputs.append(block_out) + + if i < 2: + route = self.add_sublayer( + "route2_%d" % i, + ConvBNLayer( + ch_in=512 // (2**i), + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not self.is_train))) + self.route_blocks_2.append(route) + self.upsample = Upsample() + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint)[0] + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'yolov3_darknet53_voc.pdparams') + if not os.path.exists(checkpoint): + os.system( + 'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \ + + checkpoint) + model_dict = paddle.load(checkpoint)[0] + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + def transform(self, img): + if self.is_train: + transform = T.Compose([ + T.RandomDistort(), + T.RandomExpand(fill=[0.485, 0.456, 0.406]), + T.RandomCrop(), + T.Resize(target_size=416), + T.RandomFlip(), + T.ShuffleBox(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + else: + transform = T.Compose([ + T.Resize(target_size=416, interp='CUBIC'), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return transform(img) + + def forward(self, inputs: paddle.Tensor): + outputs = [] + blocks = self.block(inputs) + route = None + for i, block in enumerate(blocks): + if i > 0: + block = paddle.concat([route, block], axis=1) + route, tip = self.yolo_blocks[i](block) + block_out = self.block_outputs[i](tip) + outputs.append(block_out) + if i < 2: + route = self.route_blocks_2[i](route) + route = self.upsample(route) + + return outputs diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md rename to modules/image/object_detection/yolov3_darknet53_pedestrian/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/__init__.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/__init__.py rename to modules/image/object_detection/yolov3_darknet53_pedestrian/__init__.py diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..58c5b090172b042d7df701c6691b3c1e867c1b23 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py @@ -0,0 +1,123 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import math + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['DarkNet'] + + +class DarkNet(object): + """DarkNet, see https://pjreddie.com/darknet/yolo/ + Args: + depth (int): network depth, currently only darknet 53 is supported + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + get_prediction (bool): whether to get prediction + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=53, + norm_type='sync_bn', + norm_decay=0., + weight_prefix_name='', + get_prediction=False, + class_dim=1000): + assert depth in [53], "unsupported depth value" + self.depth = depth + self.norm_type = norm_type + self.norm_decay = norm_decay + self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} + self.prefix_name = weight_prefix_name + self.class_dim = class_dim + self.get_prediction = get_prediction + + def _conv_norm(self, input, ch_out, filter_size, stride, padding, act='leaky', name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.offset') + + out = fluid.layers.batch_norm( + input=conv, + act=None, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + # leaky relu here has `alpha` as 0.1, can not be set by + # `act` param in fluid.layers.batch_norm above. + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + + return out + + def _downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): + return self._conv_norm(input, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, name=name) + + def basicblock(self, input, ch_out, name=None): + conv1 = self._conv_norm(input, ch_out=ch_out, filter_size=1, stride=1, padding=0, name=name + ".0") + conv2 = self._conv_norm(conv1, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, name=name + ".1") + out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) + return out + + def layer_warp(self, block_func, input, ch_out, count, name=None): + out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) + for j in six.moves.xrange(1, count): + out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) + return out + + def __call__(self, input): + """Get the backbone of DarkNet, that is output for the 5 stages. + + :param input: Variable of input image + :type input: Variable + :Returns: The last variables of each stage. + """ + stages, block_func = self.depth_cfg[self.depth] + stages = stages[0:5] + conv = self._conv_norm( + input=input, ch_out=32, filter_size=3, stride=1, padding=1, name=self.prefix_name + "yolo_input") + downsample_ = self._downsample( + input=conv, ch_out=conv.shape[1] * 2, name=self.prefix_name + "yolo_input.downsample") + blocks = [] + for i, stage in enumerate(stages): + block = self.layer_warp( + block_func=block_func, + input=downsample_, + ch_out=32 * 2**i, + count=stage, + name=self.prefix_name + "stage.{}".format(i)) + blocks.append(block) + if i < len(stages) - 1: # do not downsaple in the last stage + downsample_ = self._downsample( + input=block, ch_out=block.shape[1] * 2, name=self.prefix_name + "stage.{}.downsample".format(i)) + if self.get_prediction: + pool = fluid.layers.pool2d(input=block, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.softmax(out) + return out + else: + return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/label_file.txt b/modules/image/object_detection/yolov3_darknet53_pedestrian/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/label_file.txt rename to modules/image/object_detection/yolov3_darknet53_pedestrian/label_file.txt diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py new file mode 100644 index 0000000000000000000000000000000000000000..4b9b557608fe462ee30a27dc55294baa1ef49fbf --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py @@ -0,0 +1,282 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_darknet53_pedestrian.darknet import DarkNet +from yolov3_darknet53_pedestrian.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_darknet53_pedestrian.data_feed import reader +from yolov3_darknet53_pedestrian.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_darknet53_pedestrian", + version="1.0.1", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3DarkNet53Pedestrian(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_pedestrian_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head( + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + norm_decay=0., + num_classes=1, + ignore_thresh=0.7, + label_smooth=True, + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=False, + score_threshold=0.01)) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='yolov3_pedestrian_detect_output', + score_thresh=0.2, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of pedestrian detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) + im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='yolov3_pedestrian_detect_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.2, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..64049e42b1d4dfa67aff606cae4490710b9cd6cc --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/README.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/README.md rename to modules/image/object_detection/yolov3_darknet53_vehicles/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/__init__.py b/modules/image/object_detection/yolov3_darknet53_vehicles/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/__init__.py rename to modules/image/object_detection/yolov3_darknet53_vehicles/__init__.py diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py b/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..58c5b090172b042d7df701c6691b3c1e867c1b23 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py @@ -0,0 +1,123 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import math + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['DarkNet'] + + +class DarkNet(object): + """DarkNet, see https://pjreddie.com/darknet/yolo/ + Args: + depth (int): network depth, currently only darknet 53 is supported + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + get_prediction (bool): whether to get prediction + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=53, + norm_type='sync_bn', + norm_decay=0., + weight_prefix_name='', + get_prediction=False, + class_dim=1000): + assert depth in [53], "unsupported depth value" + self.depth = depth + self.norm_type = norm_type + self.norm_decay = norm_decay + self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} + self.prefix_name = weight_prefix_name + self.class_dim = class_dim + self.get_prediction = get_prediction + + def _conv_norm(self, input, ch_out, filter_size, stride, padding, act='leaky', name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.offset') + + out = fluid.layers.batch_norm( + input=conv, + act=None, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + # leaky relu here has `alpha` as 0.1, can not be set by + # `act` param in fluid.layers.batch_norm above. + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + + return out + + def _downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): + return self._conv_norm(input, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, name=name) + + def basicblock(self, input, ch_out, name=None): + conv1 = self._conv_norm(input, ch_out=ch_out, filter_size=1, stride=1, padding=0, name=name + ".0") + conv2 = self._conv_norm(conv1, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, name=name + ".1") + out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) + return out + + def layer_warp(self, block_func, input, ch_out, count, name=None): + out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) + for j in six.moves.xrange(1, count): + out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) + return out + + def __call__(self, input): + """Get the backbone of DarkNet, that is output for the 5 stages. + + :param input: Variable of input image + :type input: Variable + :Returns: The last variables of each stage. + """ + stages, block_func = self.depth_cfg[self.depth] + stages = stages[0:5] + conv = self._conv_norm( + input=input, ch_out=32, filter_size=3, stride=1, padding=1, name=self.prefix_name + "yolo_input") + downsample_ = self._downsample( + input=conv, ch_out=conv.shape[1] * 2, name=self.prefix_name + "yolo_input.downsample") + blocks = [] + for i, stage in enumerate(stages): + block = self.layer_warp( + block_func=block_func, + input=downsample_, + ch_out=32 * 2**i, + count=stage, + name=self.prefix_name + "stage.{}".format(i)) + blocks.append(block) + if i < len(stages) - 1: # do not downsaple in the last stage + downsample_ = self._downsample( + input=block, ch_out=block.shape[1] * 2, name=self.prefix_name + "stage.{}.downsample".format(i)) + if self.get_prediction: + pool = fluid.layers.pool2d(input=block, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.softmax(out) + return out + else: + return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py b/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/label_file.txt b/modules/image/object_detection/yolov3_darknet53_vehicles/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/label_file.txt rename to modules/image/object_detection/yolov3_darknet53_vehicles/label_file.txt diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8e408090a3c5016f62520b1b6480fa5601748a58 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py @@ -0,0 +1,282 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_darknet53_vehicles.darknet import DarkNet +from yolov3_darknet53_vehicles.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_darknet53_vehicles.data_feed import reader +from yolov3_darknet53_vehicles.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_darknet53_vehicles", + version="1.0.1", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3DarkNet53Vehicles(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_vehicles_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head( + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + anchors=[[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], + [253, 224]], + norm_decay=0., + num_classes=6, + ignore_thresh=0.7, + label_smooth=False, + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=400, + normalized=False, + score_threshold=0.005)) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='yolov3_vehicles_detect_output', + score_thresh=0.2, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) + im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='yolov3_vehicles_detect_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.2, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py b/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..64049e42b1d4dfa67aff606cae4490710b9cd6cc --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/README.md b/modules/image/object_detection/yolov3_darknet53_venus/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_venus/README.md rename to modules/image/object_detection/yolov3_darknet53_venus/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/__init__.py b/modules/image/object_detection/yolov3_darknet53_venus/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/__init__.py rename to modules/image/object_detection/yolov3_darknet53_venus/__init__.py diff --git a/modules/image/object_detection/yolov3_darknet53_venus/darknet.py b/modules/image/object_detection/yolov3_darknet53_venus/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..fe925fb4c59cb7791eb08fedf143720287c1d424 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_venus/darknet.py @@ -0,0 +1,121 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import math + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['DarkNet'] + + +class DarkNet(object): + """DarkNet, see https://pjreddie.com/darknet/yolo/ + + Args: + depth (int): network depth, currently only darknet 53 is supported + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + get_prediction (bool): whether to get prediction + class_dim (int): number of class while classification + """ + + def __init__(self, + depth=53, + norm_type='sync_bn', + norm_decay=0., + weight_prefix_name='', + get_prediction=False, + class_dim=1000): + assert depth in [53], "unsupported depth value" + self.depth = depth + self.norm_type = norm_type + self.norm_decay = norm_decay + self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} + self.prefix_name = weight_prefix_name + self.class_dim = class_dim + self.get_prediction = get_prediction + + def _conv_norm(self, input, ch_out, filter_size, stride, padding, act='leaky', name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.offset') + + out = fluid.layers.batch_norm( + input=conv, + act=None, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + # leaky relu here has `alpha` as 0.1, can not be set by + # `act` param in fluid.layers.batch_norm above. + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + + return out + + def _downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): + return self._conv_norm(input, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, name=name) + + def basicblock(self, input, ch_out, name=None): + conv1 = self._conv_norm(input, ch_out=ch_out, filter_size=1, stride=1, padding=0, name=name + ".0") + conv2 = self._conv_norm(conv1, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, name=name + ".1") + out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) + return out + + def layer_warp(self, block_func, input, ch_out, count, name=None): + out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) + for j in six.moves.xrange(1, count): + out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) + return out + + def __call__(self, input): + """ + Get the backbone of DarkNet, that is output for the 5 stages. + """ + stages, block_func = self.depth_cfg[self.depth] + stages = stages[0:5] + conv = self._conv_norm( + input=input, ch_out=32, filter_size=3, stride=1, padding=1, name=self.prefix_name + "yolo_input") + downsample_ = self._downsample( + input=conv, ch_out=conv.shape[1] * 2, name=self.prefix_name + "yolo_input.downsample") + blocks = [] + for i, stage in enumerate(stages): + block = self.layer_warp( + block_func=block_func, + input=downsample_, + ch_out=32 * 2**i, + count=stage, + name=self.prefix_name + "stage.{}".format(i)) + blocks.append(block) + if i < len(stages) - 1: # do not downsaple in the last stage + downsample_ = self._downsample( + input=block, ch_out=block.shape[1] * 2, name=self.prefix_name + "stage.{}.downsample".format(i)) + if self.get_prediction: + pool = fluid.layers.pool2d(input=block, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.softmax(out) + return out + else: + return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_venus/data_feed.py b/modules/image/object_detection/yolov3_darknet53_venus/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_venus/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/modules/image/object_detection/yolov3_darknet53_venus/module.py b/modules/image/object_detection/yolov3_darknet53_venus/module.py new file mode 100644 index 0000000000000000000000000000000000000000..0ef4f251cc21993264fba471d47e1c9bfaec717f --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_venus/module.py @@ -0,0 +1,104 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_darknet53_venus.darknet import DarkNet +from yolov3_darknet53_venus.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_darknet53_venus.data_feed import reader +from yolov3_darknet53_venus.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_darknet53_venus", + version="1.0.0", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for object detection, with backbone DarkNet53, trained with Baidu self-built dataset.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3DarkNet53Venus(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_model") + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = DarkNet(norm_type='bn', norm_decay=0., depth=53) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head(num_classes=708) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog diff --git a/modules/image/object_detection/yolov3_darknet53_venus/processor.py b/modules/image/object_detection/yolov3_darknet53_venus/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ed1f34b0c4a0fe66b016dcb5f047f99787de46c1 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_venus/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return xmin, ymin, xmax, ymax + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = confidence + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_darknet53_venus/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_venus/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_venus/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md rename to modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_darknet53_venus/__init__.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_darknet53_venus/__init__.py rename to modules/image/object_detection/yolov3_mobilenet_v1_coco2017/__init__.py diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/label_file.txt b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/label_file.txt rename to modules/image/object_detection/yolov3_mobilenet_v1_coco2017/label_file.txt diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..a9c4994341ad481f32d98de5d856837cf1c9c72d --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py @@ -0,0 +1,152 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MobileNet'] + + +class MobileNet(object): + """ + MobileNet v1, see https://arxiv.org/abs/1704.04861 + + Args: + norm_type (str): normalization type, 'bn' and 'sync_bn' are supported + norm_decay (float): weight decay for normalization layer weights + conv_group_scale (int): scaling factor for convolution groups + with_extra_blocks (bool): if extra blocks should be added + extra_block_filters (list): number of filter for each extra block + """ + __shared__ = ['norm_type', 'weight_prefix_name'] + + def __init__(self, + norm_type='bn', + norm_decay=0., + conv_group_scale=1, + conv_learning_rate=1.0, + with_extra_blocks=False, + extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]], + weight_prefix_name=''): + self.norm_type = norm_type + self.norm_decay = norm_decay + self.conv_group_scale = conv_group_scale + self.conv_learning_rate = conv_learning_rate + self.with_extra_blocks = with_extra_blocks + self.extra_block_filters = extra_block_filters + self.prefix_name = weight_prefix_name + + def _conv_norm(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + parameter_attr = ParamAttr( + learning_rate=self.conv_learning_rate, initializer=fluid.initializer.MSRA(), name=name + "_weights") + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=parameter_attr, + bias_attr=False) + + bn_name = name + "_bn" + norm_decay = self.norm_decay + bn_param_attr = ParamAttr(regularizer=L2Decay(norm_decay), name=bn_name + '_scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(norm_decay), name=bn_name + '_offset') + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def depthwise_separable(self, input, num_filters1, num_filters2, num_groups, stride, scale, name=None): + depthwise_conv = self._conv_norm( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False, + name=name + "_dw") + + pointwise_conv = self._conv_norm( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + return pointwise_conv + + def _extra_block(self, input, num_filters1, num_filters2, num_groups, stride, name=None): + pointwise_conv = self._conv_norm( + input=input, + filter_size=1, + num_filters=int(num_filters1), + stride=1, + num_groups=int(num_groups), + padding=0, + name=name + "_extra1") + normal_conv = self._conv_norm( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2), + stride=2, + num_groups=int(num_groups), + padding=1, + name=name + "_extra2") + return normal_conv + + def __call__(self, input): + scale = self.conv_group_scale + + blocks = [] + # input 1/1 + out = self._conv_norm(input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") + # 1/2 + out = self.depthwise_separable(out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") + out = self.depthwise_separable(out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") + # 1/4 + out = self.depthwise_separable(out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") + out = self.depthwise_separable(out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") + # 1/8 + blocks.append(out) + out = self.depthwise_separable(out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") + out = self.depthwise_separable(out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") + # 1/16 + blocks.append(out) + for i in range(5): + out = self.depthwise_separable(out, 512, 512, 512, 1, scale, name=self.prefix_name + "conv5_" + str(i + 1)) + module11 = out + + out = self.depthwise_separable(out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") + # 1/32 + out = self.depthwise_separable(out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") + module13 = out + blocks.append(out) + if not self.with_extra_blocks: + return blocks + + num_filters = self.extra_block_filters + module14 = self._extra_block(module13, num_filters[0][0], num_filters[0][1], 1, 2, self.prefix_name + "conv7_1") + module15 = self._extra_block(module14, num_filters[1][0], num_filters[1][1], 1, 2, self.prefix_name + "conv7_2") + module16 = self._extra_block(module15, num_filters[2][0], num_filters[2][1], 1, 2, self.prefix_name + "conv7_3") + module17 = self._extra_block(module16, num_filters[3][0], num_filters[3][1], 1, 2, self.prefix_name + "conv7_4") + return module11, module13, module14, module15, module16, module17 diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..0e71a6dcefe80330f27bada476ab80e5e8f3392a --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py @@ -0,0 +1,265 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_mobilenet_v1_coco2017.mobilenet_v1 import MobileNet +from yolov3_mobilenet_v1_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_mobilenet_v1_coco2017.data_feed import reader +from yolov3_mobilenet_v1_coco2017.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_mobilenet_v1_coco2017", + version="1.0.1", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3MobileNetV1Coco2017(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_mobilenet_v1_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = MobileNet(norm_type='sync_bn', norm_decay=0., conv_group_scale=1, with_extra_blocks=False) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head(num_classes=80) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(startup_program, var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) + im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..64049e42b1d4dfa67aff606cae4490710b9cd6cc --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/README.md b/modules/image/object_detection/yolov3_resnet34_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/README.md rename to modules/image/object_detection/yolov3_resnet34_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/__init__.py b/modules/image/object_detection/yolov3_resnet34_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/__init__.py rename to modules/image/object_detection/yolov3_resnet34_coco2017/__init__.py diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py b/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/label_file.txt b/modules/image/object_detection/yolov3_resnet34_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/label_file.txt rename to modules/image/object_detection/yolov3_resnet34_coco2017/label_file.txt diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/module.py b/modules/image/object_detection/yolov3_resnet34_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2bea01d72a439b9336fef92a4bca249208e0b9f2 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/module.py @@ -0,0 +1,266 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_resnet34_coco2017.resnet import ResNet +from yolov3_resnet34_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_resnet34_coco2017.data_feed import reader +from yolov3_resnet34_coco2017.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_resnet34_coco2017", + version="1.0.1", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for object detection with backbone ResNet34, trained with dataset coco2017.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3ResNet34Coco2017(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_resnet34_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = ResNet( + norm_type='bn', freeze_at=0, freeze_norm=False, norm_decay=0., depth=34, feature_maps=[3, 4, 5]) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head(num_classes=80) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) + im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py b/modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py rename to modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py b/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py b/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..64049e42b1d4dfa67aff606cae4490710b9cd6cc --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py b/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..77a3f7f4c7b16c3f9c65c46fc93eb394befa5110 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md rename to modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/__init__.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/__init__.py rename to modules/image/object_detection/yolov3_resnet50_vd_coco2017/__init__.py diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/data_feed.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..28abc06a2bd25028d13bbc4ea724c111ca966477 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/data_feed.py @@ -0,0 +1,69 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import os + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(paths=[], images=None): + """ + data generator + + Args: + paths (list[str]): paths to images. + images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] + + Yield: + res (list): preprocessed image and the size of original image. + """ + img_list = [] + if paths: + assert type(paths) is list, "type(paths) is not list." + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) + img = cv2.imread(img_path).astype('float32') + img_list.append(img) + if images is not None: + for img in images: + img_list.append(img) + + for im in img_list: + # im_size + im_shape = im.shape + im_size = np.array([im_shape[0], im_shape[1]], dtype=np.int32) + + # decode image + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + + # resize image + target_size = 608 + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + if float(im_size_min) == 0: + raise ZeroDivisionError('min size of image is 0') + + im_scale_x = float(target_size) / float(im_shape[1]) + im_scale_y = float(target_size) / float(im_shape[0]) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + + # normalize image + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + mean = np.array(mean)[np.newaxis, np.newaxis, :] + std = np.array(std)[np.newaxis, np.newaxis, :] + im = im / 255.0 + im -= mean + im /= std + + # permute + im = np.swapaxes(im, 1, 2) + im = np.swapaxes(im, 1, 0) + + yield [im, im_size] diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/label_file.txt b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/label_file.txt similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/label_file.txt rename to modules/image/object_detection/yolov3_resnet50_vd_coco2017/label_file.txt diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d8b2c64555979c21ba9c9a805e95cd7bafd82824 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py @@ -0,0 +1,273 @@ +# coding=utf-8 +from __future__ import absolute_import + +import ast +import argparse +import os +from functools import partial + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.common.paddle_helper import add_vars_prefix + +from yolov3_resnet50_vd_coco2017.resnet import ResNet +from yolov3_resnet50_vd_coco2017.processor import load_label_info, postprocess, base64_to_cv2 +from yolov3_resnet50_vd_coco2017.data_feed import reader +from yolov3_resnet50_vd_coco2017.yolo_head import MultiClassNMS, YOLOv3Head + + +@moduleinfo( + name="yolov3_resnet50_vd_coco2017", + version="1.0.1", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for object detection with backbone ResNet50, trained with dataset coco2017.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class YOLOv3ResNet50Coco2017(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_resnet50_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) + self._set_config() + + def _set_config(self): + """ + predictor config setting. + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=True, pretrained=True, get_prediction=False): + """ + Distill the Head Features, so as to perform transfer learning. + + Args: + trainable (bool): whether to set parameters trainable. + pretrained (bool): whether to load default pretrained model. + get_prediction (bool): whether to get prediction. + + Returns: + inputs(dict): the input variables. + outputs(dict): the output variables. + context_prog (Program): the program to execute transfer learning. + """ + context_prog = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(context_prog, startup_program): + with fluid.unique_name.guard(): + # image + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') + # backbone + backbone = ResNet( + norm_type='sync_bn', + freeze_at=0, + freeze_norm=False, + norm_decay=0., + dcn_v2_stages=[5], + depth=50, + variant='d', + feature_maps=[3, 4, 5]) + # body_feats + body_feats = backbone(image) + # im_size + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') + # yolo_head + yolo_head = YOLOv3Head(num_classes=80) + # head_features + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # var_prefix + var_prefix = '@HUB_{}@'.format(self.name) + # name of inputs + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} + # name of outputs + if get_prediction: + bbox_out = yolo_head.get_prediction(head_features, im_size) + outputs = {'bbox_out': [var_prefix + bbox_out.name]} + else: + outputs = { + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] + } + # add_vars_prefix + add_vars_prefix(context_prog, var_prefix) + add_vars_prefix(fluid.default_startup_program(), var_prefix) + # inputs + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} + # outputs + outputs = { + key: [context_prog.global_block().vars[varname] for varname in value] + for key, value in outputs.items() + } + # trainable + for param in context_prog.global_block().iter_parameters(): + param.trainable = trainable + # pretrained + if pretrained: + + def _if_exist(var): + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) + else: + exe.run(startup_program) + + return inputs, outputs, context_prog + + def object_detection(self, + paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True): + """API of Object Detection. + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): threshold for object detecion. + + Returns: + res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str, optional): The path to save output images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + paths = paths if paths else list() + data_reader = partial(reader, paths, images) + batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + res = [] + for iter_id, feed_data in enumerate(batch_reader()): + feed_data = np.array(feed_data) + image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) + im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) + if use_gpu: + data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) + else: + data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) + + output = postprocess( + paths=paths, + images=images, + data_out=data_out, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) + res.extend(output) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.object_detection(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.object_detection( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py rename to modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..839df4caf744280001f033d8ef6a3d560277368e --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +nonlocal_params = { + "use_zero_init_conv": False, + "conv_init_std": 0.01, + "no_bias": True, + "use_maxpool": False, + "use_softmax": True, + "use_bn": False, + "use_scale": True, # vital for the model prformance!!! + "use_affine": False, + "bn_momentum": 0.9, + "bn_epsilon": 1.0000001e-5, + "bn_init_gamma": 0.9, + "weight_decay_bn": 1.e-4, +} + + +def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): + cur = input + theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if not nonlocal_params["no_bias"] else False, \ + name = prefix + '_theta') + theta_shape = theta.shape + theta_shape_op = fluid.layers.shape(theta) + theta_shape_op.stop_gradient = True + + if nonlocal_params["use_maxpool"]: + max_pool = fluid.layers.pool2d(input = cur, \ + pool_size = [max_pool_stride, max_pool_stride], \ + pool_type = 'max', \ + pool_stride = [max_pool_stride, max_pool_stride], \ + pool_padding = [0, 0], \ + name = prefix + '_pool') + else: + max_pool = cur + + phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_phi') + phi_shape = phi.shape + + g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ + filter_size = [1, 1], stride = [1, 1], \ + padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_g' + "_w", \ + initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_g') + g_shape = g.shape + # we have to use explicit batch size (to support arbitrary spacetime size) + # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) + theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) + theta = fluid.layers.transpose(theta, [0, 2, 1]) + phi = fluid.layers.reshape(phi, [0, 0, -1]) + theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') + g = fluid.layers.reshape(g, [0, 0, -1]) + + if nonlocal_params["use_softmax"]: + if nonlocal_params["use_scale"]: + theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) + else: + theta_phi_sc = theta_phi + p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') + else: + # not clear about what is doing in xlw's code + p = None # not implemented + raise "Not implemented when not use softmax" + + # note g's axis[2] corresponds to p's axis[2] + # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) + p = fluid.layers.transpose(p, [0, 2, 1]) + t = fluid.layers.matmul(g, p, name=prefix + '_y') + + # reshape back + # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) + t_shape = t.shape + t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) + blob_out = t_re + blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ + filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ + param_attr = ParamAttr(name = prefix + '_out' + "_w", \ + initializer = fluid.initializer.Constant(value = 0.) \ + if nonlocal_params["use_zero_init_conv"] \ + else fluid.initializer.Normal(loc = 0.0, + scale = nonlocal_params["conv_init_std"])), \ + bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ + initializer = fluid.initializer.Constant(value = 0.)) \ + if (nonlocal_params["no_bias"] == 0) else False, \ + name = prefix + '_out') + blob_out_shape = blob_out.shape + + if nonlocal_params["use_bn"]: + bn_name = prefix + "_bn" + blob_out = fluid.layers.batch_norm(blob_out, \ + # is_test = test_mode, \ + momentum = nonlocal_params["bn_momentum"], \ + epsilon = nonlocal_params["bn_epsilon"], \ + name = bn_name, \ + param_attr = ParamAttr(name = bn_name + "_s", \ + initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + bias_attr = ParamAttr(name = bn_name + "_b", \ + regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ + moving_mean_name = bn_name + "_rm", \ + moving_variance_name = bn_name + "_riv") # add bn + + if nonlocal_params["use_affine"]: + affine_scale = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_s'), \ + default_initializer = fluid.initializer.Constant(value = 1.)) + affine_bias = fluid.layers.create_parameter(\ + shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ + attr=ParamAttr(name=prefix + '_affine' + '_b'), \ + default_initializer = fluid.initializer.Constant(value = 0.)) + blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ + bias = affine_bias, name = prefix + '_affine') # add affine + + return blob_out + + +def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): + ''' + add_space_nonlocal: + Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 + ''' + conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) + output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') + return output diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..64049e42b1d4dfa67aff606cae4490710b9cd6cc --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py @@ -0,0 +1,163 @@ +# coding=utf-8 +import base64 +import os + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(img, output_dir, image_path): + """Get save image name from source image path. + """ + image_name = os.path.split(image_path)[-1] + name, ext = os.path.splitext(image_name) + + if ext == '': + if img.format == 'PNG': + ext = '.png' + elif img.format == 'JPEG': + ext = '.jpg' + elif img.format == 'BMP': + ext = '.bmp' + else: + if img.mode == "RGB" or img.mode == "L": + ext = ".jpg" + elif img.mode == "RGBA" or img.mode == "P": + ext = '.png' + + return os.path.join(output_dir, "{}".format(name)) + ext + + +def draw_bounding_box_on_image(image_path, data_list, save_dir): + image = Image.open(image_path) + draw = ImageDraw.Draw(image) + for data in data_list: + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] + # draw bbox + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') + # draw label + if image.mode == 'RGB': + text = data['label'] + ": %.2f%%" % (100 * data['confidence']) + textsize_width, textsize_height = draw.textsize(text=text) + draw.rectangle( + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) + draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) + + save_name = get_save_image_name(image, save_dir, image_path) + if os.path.exists(save_name): + os.remove(save_name) + + image.save(save_name) + return save_name + + +def clip_bbox(bbox, img_width, img_height): + xmin = max(min(bbox[0], img_width), 0.) + ymin = max(min(bbox[1], img_height), 0.) + xmax = max(min(bbox[2], img_width), 0.) + ymax = max(min(bbox[3], img_height), 0.) + return float(xmin), float(ymin), float(xmax), float(ymax) + + +def load_label_info(file_path): + with open(file_path, 'r') as fr: + text = fr.readlines() + label_names = [] + for info in text: + label_names.append(info.strip()) + return label_names + + +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): + """ + postprocess the lod_tensor produced by fluid.Executor.run + + Args: + paths (list[str]): The paths of images. + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + data_out (lod_tensor): data output of predictor. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save image or not. + score_thresh (float): the low limit of bounding box. + label_names (list[str]): label names. + handle_id (int): The number of images that have been handled. + + Returns: + res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: + data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: + left (float): The X coordinate of the upper left corner of the bounding box; + top (float): The Y coordinate of the upper left corner of the bounding box; + right (float): The X coordinate of the lower right corner of the bounding box; + bottom (float): The Y coordinate of the lower right corner of the bounding box; + label (str): The label of detection result; + confidence (float): The confidence of detection result. + save_path (str): The path to save output images. + """ + lod_tensor = data_out[0] + lod = lod_tensor.lod[0] + results = lod_tensor.as_ndarray() + + check_dir(output_dir) + + assert type(paths) is list, "type(paths) is not list." + if handle_id < len(paths): + unhandled_paths = paths[handle_id:] + unhandled_paths_num = len(unhandled_paths) + else: + unhandled_paths_num = 0 + + output = list() + for index in range(len(lod) - 1): + output_i = {'data': []} + if index < unhandled_paths_num: + org_img_path = unhandled_paths[index] + org_img = Image.open(org_img_path) + else: + org_img = images[index - unhandled_paths_num] + org_img = org_img.astype(np.uint8) + org_img = Image.fromarray(org_img[:, :, ::-1]) + if visualization: + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) + org_img.save(org_img_path) + org_img_height = org_img.height + org_img_width = org_img.width + result_i = results[lod[index]:lod[index + 1]] + for row in result_i: + if len(row) != 6: + continue + if row[1] < score_thresh: + continue + category_id = int(row[0]) + confidence = row[1] + bbox = row[2:] + dt = {} + dt['label'] = label_names[category_id] + dt['confidence'] = float(confidence) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) + output_i['data'].append(dt) + + output.append(output_i) + if visualization: + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) + + return output diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..77a3f7f4c7b16c3f9c65c46fc93eb394befa5110 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py @@ -0,0 +1,364 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from collections import OrderedDict +from numbers import Integral + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Constant + +from .nonlocal_helper import add_space_nonlocal +from .name_adapter import NameAdapter + +__all__ = ['ResNet', 'ResNetC5'] + + +class ResNet(object): + """ + Residual Network, see https://arxiv.org/abs/1512.03385 + Args: + depth (int): ResNet depth, should be 34, 50. + freeze_at (int): freeze the backbone at which stage + norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' + freeze_norm (bool): freeze normalization layers + norm_decay (float): weight decay for normalization layer weights + variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently + feature_maps (list): index of stages whose feature maps are returned + dcn_v2_stages (list): index of stages who select deformable conv v2 + nonlocal_stages (list): index of stages who select nonlocal networks + """ + __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] + + def __init__(self, + depth=50, + freeze_at=0, + norm_type='sync_bn', + freeze_norm=False, + norm_decay=0., + variant='b', + feature_maps=[3, 4, 5], + dcn_v2_stages=[], + weight_prefix_name='', + nonlocal_stages=[], + get_prediction=False, + class_dim=1000): + super(ResNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert depth in [34, 50], \ + "depth {} not in [34, 50]" + assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn', 'affine_channel'] + assert not (len(nonlocal_stages)>0 and depth<50), \ + "non-local is not supported for resnet18 or resnet34" + + self.depth = depth + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.variant = variant + self._model_type = 'ResNet' + self.feature_maps = feature_maps + self.dcn_v2_stages = dcn_v2_stages + self.depth_cfg = { + 34: ([3, 4, 6, 3], self.basicblock), + 50: ([3, 4, 6, 3], self.bottleneck), + } + self.stage_filters = [64, 128, 256, 512] + self._c1_out_chan_num = 64 + self.na = NameAdapter(self) + self.prefix_name = weight_prefix_name + + self.nonlocal_stages = nonlocal_stages + self.nonlocal_mod_cfg = { + 50: 2, + 101: 5, + 152: 8, + 200: 12, + } + self.get_prediction = get_prediction + self.class_dim = class_dim + + def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + act=act, + name=name) + return out + + def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): + _name = self.prefix_name + name if self.prefix_name != '' else name + if not dcn_v2: + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + '.conv2d.output.1') + else: + # select deformable conv" + offset_mask = self._conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=_name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr(name=_name + "_weights"), + bias_attr=False, + name=_name + ".conv2d.output.1") + + bn_name = self.na.fix_conv_norm_name(name) + bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name + + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) + + if self.norm_type in ['bn', 'sync_bn']: + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif self.norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def _shortcut(self, input, ch_out, stride, is_first, name): + max_pooling_in_short_cut = self.variant == 'd' + ch_in = input.shape[1] + # the naming rule is same as pretrained weight + name = self.na.fix_shortcut_name(name) + std_senet = getattr(self, 'std_senet', False) + if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): + if std_senet: + if is_first: + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return self._conv_norm(input, ch_out, 3, stride, name=name) + if max_pooling_in_short_cut and not is_first: + input = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') + return self._conv_norm(input, ch_out, 1, 1, name=name) + return self._conv_norm(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): + if self.variant == 'a': + stride1, stride2 = stride, 1 + else: + stride1, stride2 = 1, stride + + # ResNeXt + groups = getattr(self, 'groups', 1) + group_width = getattr(self, 'group_width', -1) + if groups == 1: + expand = 4 + elif (groups * group_width) == 256: + expand = 1 + else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d + num_filters = num_filters // 2 + expand = 2 + + conv_name1, conv_name2, conv_name3, \ + shortcut_name = self.na.fix_bottleneck_name(name) + std_senet = getattr(self, 'std_senet', False) + if std_senet: + conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + else: + conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], + [num_filters, 3, stride2, 'relu', groups, conv_name2], + [num_filters * expand, 1, 1, None, 1, conv_name3]] + + residual = input + for i, (c, k, s, act, g, _name) in enumerate(conv_def): + residual = self._conv_norm( + input=residual, + num_filters=c, + filter_size=k, + stride=s, + act=act, + groups=g, + name=_name, + dcn_v2=(i == 1 and dcn_v2)) + short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) + # Squeeze-and-Excitation + if callable(getattr(self, '_squeeze_excitation', None)): + residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) + return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") + + def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): + assert dcn_v2 is False, "Not implemented yet." + conv0 = self._conv_norm( + input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") + conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") + short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + def layer_warp(self, input, stage_num): + """ + Args: + input (Variable): input variable. + stage_num (int): the stage number, should be 2, 3, 4, 5 + + Returns: + The last variable in endpoint-th stage. + """ + assert stage_num in [2, 3, 4, 5] + + stages, block_func = self.depth_cfg[self.depth] + count = stages[stage_num - 2] + + ch_out = self.stage_filters[stage_num - 2] + is_first = False if stage_num != 2 else True + dcn_v2 = True if stage_num in self.dcn_v2_stages else False + + nonlocal_mod = 1000 + if stage_num in self.nonlocal_stages: + nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 + + # Make the layer name and parameter name consistent + # with ImageNet pre-trained model + conv = input + for i in range(count): + conv_name = self.na.fix_layer_warp_name(stage_num, count, i) + if self.depth < 50: + is_first = True if i == 0 and stage_num == 2 else False + conv = block_func( + input=conv, + num_filters=ch_out, + stride=2 if i == 0 and stage_num != 2 else 1, + is_first=is_first, + name=conv_name, + dcn_v2=dcn_v2) + + # add non local model + dim_in = conv.shape[1] + nonlocal_name = "nonlocal_conv{}".format(stage_num) + if i % nonlocal_mod == nonlocal_mod - 1: + conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) + return conv + + def c1_stage(self, input): + out_chan = self._c1_out_chan_num + + conv1_name = self.na.fix_c1_stage_name() + + if self.variant in ['c', 'd']: + conv_def = [ + [out_chan // 2, 3, 2, "conv1_1"], + [out_chan // 2, 3, 1, "conv1_2"], + [out_chan, 3, 1, "conv1_3"], + ] + else: + conv_def = [[out_chan, 7, 2, conv1_name]] + + for (c, k, s, _name) in conv_def: + input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) + + output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + return output + + def __call__(self, input): + assert isinstance(input, Variable) + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + severed_head = getattr(self, 'severed_head', False) + if not severed_head: + res = self.c1_stage(res) + feature_maps = range(2, max(self.feature_maps) + 1) + + for i in feature_maps: + res = self.layer_warp(res, i) + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + if self.get_prediction: + pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc( + input=pool, + size=self.class_dim, + param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.softmax(out) + return out + return OrderedDict( + [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) + + +class ResNetC5(ResNet): + def __init__(self, + depth=50, + freeze_at=2, + norm_type='affine_channel', + freeze_norm=True, + norm_decay=0., + variant='b', + feature_maps=[5], + weight_prefix_name=''): + super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) + self.severed_head = True diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..cfe796c2edebedbff19f302b24533849ee09c2e3 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py @@ -0,0 +1,231 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +__all__ = ['MultiClassNMS', 'YOLOv3Head'] + + +class MultiClassNMS(object): + # __op__ = fluid.layers.multiclass_nms + def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): + super(MultiClassNMS, self).__init__() + self.background_label = background_label + self.keep_top_k = keep_top_k + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.normalized = normalized + self.score_threshold = score_threshold + + +class YOLOv3Head(object): + """Head block for YOLOv3 network + + Args: + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + ignore_thresh (float): threshold to ignore confidence loss + label_smooth (bool): whether to use label smoothing + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + + def __init__(self, + norm_decay=0., + num_classes=80, + ignore_thresh=0.7, + label_smooth=True, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], + [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + nms=MultiClassNMS( + background_label=-1, + keep_top_k=100, + nms_threshold=0.45, + nms_top_k=1000, + normalized=True, + score_threshold=0.01), + weight_prefix_name=''): + self.norm_decay = norm_decay + self.num_classes = num_classes + self.ignore_thresh = ignore_thresh + self.label_smooth = label_smooth + self.anchor_masks = anchor_masks + self._parse_anchors(anchors) + self.nms = nms + self.prefix_name = weight_prefix_name + + def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + ".conv.weights"), + bias_attr=False) + + bn_name = name + ".bn" + bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') + bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') + out = fluid.layers.batch_norm( + input=conv, + act=None, + is_test=is_test, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '.mean', + moving_variance_name=bn_name + '.var') + + if act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + + def _detection_block(self, input, channel, is_test=True, name=None): + assert channel % 2 == 0, \ + "channel {} cannot be divided by 2 in detection block {}" \ + .format(channel, name) + + conv = input + for j in range(2): + conv = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) + conv = self._conv_bn( + conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) + route = self._conv_bn( + conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) + tip = self._conv_bn( + route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) + return route, tip + + def _upsample(self, input, scale=2, name=None): + out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) + return out + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + outputs = [] + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + if isinstance(input, OrderedDict): + blocks = list(input.values())[-1:-out_layer_num - 1:-1] + else: + blocks = input[-1:-out_layer_num - 1:-1] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) + + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=256 // (2**i), + filter_size=1, + stride=1, + padding=0, + is_test=(not is_train), + name=self.prefix_name + "yolo_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + return outputs, blocks + + def get_prediction(self, outputs, im_size): + """ + Get prediction result of YOLOv3 network + + Args: + outputs (list): list of Variables, return from _get_outputs + im_size (Variable): Variable of size([h, w]) of each image + + Returns: + pred (Variable): The prediction result after non-max suppress. + + """ + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=self.nms.score_threshold, + downsample_ratio=downsample, + name=self.prefix_name + "yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + + downsample //= 2 + + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.nms.score_threshold, + nms_top_k=self.nms.nms_top_k, + keep_top_k=self.nms.keep_top_k, + nms_threshold=self.nms.nms_threshold, + background_label=self.nms.background_label, + normalized=self.nms.normalized, + name="multiclass_nms") + return pred diff --git a/hub_module/modules/image/semantic_segmentation/README.md b/modules/image/semantic_segmentation/README.md similarity index 100% rename from hub_module/modules/image/semantic_segmentation/README.md rename to modules/image/semantic_segmentation/README.md diff --git a/hub_module/modules/image/semantic_segmentation/ace2p/README.md b/modules/image/semantic_segmentation/ace2p/README.md similarity index 100% rename from hub_module/modules/image/semantic_segmentation/ace2p/README.md rename to modules/image/semantic_segmentation/ace2p/README.md diff --git a/hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/__init__.py b/modules/image/semantic_segmentation/ace2p/__init__.py similarity index 100% rename from hub_module/modules/image/object_detection/yolov3_resnet50_vd_coco2017/__init__.py rename to modules/image/semantic_segmentation/ace2p/__init__.py diff --git a/modules/image/semantic_segmentation/ace2p/data_feed.py b/modules/image/semantic_segmentation/ace2p/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..39094654805525fbba5bea55a30a57d605151646 --- /dev/null +++ b/modules/image/semantic_segmentation/ace2p/data_feed.py @@ -0,0 +1,96 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +from ace2p.processor import get_direction, get_3rd_point, get_affine_transform + +__all__ = ['reader'] + + +def _box2cs(box, aspect_ratio): + x, y, w, h = box[:4] + return _xywh2cs(x, y, w, h, aspect_ratio) + + +def _xywh2cs(x, y, w, h, aspect_ratio, pixel_std=200): + center = np.zeros((2), dtype=np.float32) + center[0] = x + w * 0.5 + center[1] = y + h * 0.5 + if w > aspect_ratio * h: + h = w * 1.0 / aspect_ratio + elif w < aspect_ratio * h: + w = h * aspect_ratio + scale = np.array([w * 1.0 / pixel_std, h * 1.0 / pixel_std], dtype=np.float32) + return center, scale + + +def preprocess(org_im, scale, rotation): + image = org_im.copy() + image_height, image_width, _ = image.shape + + aspect_ratio = scale[1] * 1.0 / scale[0] + image_center, image_scale = _box2cs([0, 0, image_width - 1, image_height - 1], aspect_ratio) + + trans = get_affine_transform(image_center, image_scale, rotation, scale) + image = cv2.warpAffine( + image, + trans, (int(scale[1]), int(scale[0])), + flags=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(0, 0, 0)) + + img_mean = np.array([0.406, 0.456, 0.485]).reshape((1, 1, 3)) + img_std = np.array([0.225, 0.224, 0.229]).reshape((1, 1, 3)) + image = image.astype(np.float) + image = (image / 255.0 - img_mean) / img_std + image = image.transpose(2, 0, 1).astype(np.float32) + + image_info = { + 'image_center': image_center, + 'image_height': image_height, + 'image_width': image_width, + 'image_scale': image_scale, + 'rotation': rotation, + 'scale': scale + } + + return image, image_info + + +def reader(images, paths, scale, rotation): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + scale (tuple): size of preprocessed image. + rotation (int): rotation angle, used for obtaining affine matrix in preprocess. + + Yield: + element (collections.OrderedDict): info of original image and preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + each['org_im'] = im + each['org_im_path'] = im_path + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}.jpg'.format(round(time.time(), 6) * 1e6) + component.append(each) + + for element in component: + element['image'], element['image_info'] = preprocess(element['org_im'], scale, rotation) + yield element diff --git a/hub_module/modules/image/semantic_segmentation/ace2p/label_list.txt b/modules/image/semantic_segmentation/ace2p/label_list.txt similarity index 100% rename from hub_module/modules/image/semantic_segmentation/ace2p/label_list.txt rename to modules/image/semantic_segmentation/ace2p/label_list.txt diff --git a/modules/image/semantic_segmentation/ace2p/module.py b/modules/image/semantic_segmentation/ace2p/module.py new file mode 100644 index 0000000000000000000000000000000000000000..458f33d10def98dd46616add639e5a9998205b10 --- /dev/null +++ b/modules/image/semantic_segmentation/ace2p/module.py @@ -0,0 +1,201 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import argparse +import os + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from ace2p.processor import get_palette, postprocess, base64_to_cv2, cv2_to_base64 +from ace2p.data_feed import reader + + +@moduleinfo( + name="ace2p", + type="CV/semantic-segmentation", + author="baidu-idl", + author_email="", + summary="ACE2P is an image segmentation model for human parsing solution.", + version="1.1.0") +class ACE2P(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "ace2p_human_parsing") + # label list + label_list_file = os.path.join(self.directory, 'label_list.txt') + with open(label_list_file, "r") as file: + content = file.read() + self.label_list = content.split("\n") + # palette used in postprocess + self.palette = get_palette(len(self.label_list)) + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segmentation(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + output_dir='ace2p_output', + visualization=False): + """ + API for human parsing. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C], color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + output_dir (str): The path to store output images. + visualization (bool): Whether to save output images or not. + + Returns: + res (list[dict]): The result of human parsing and original path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data and 'image' in data: + if paths is None: + paths = [] + paths += data['image'] + + # get all data + all_data = [] + scale = (473, 473) # size of preprocessed image. + rotation = 0 # rotation angle, used for obtaining affine matrix in preprocess. + for yield_data in reader(images, paths, scale, rotation): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = [] + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.astype('float32')) + data_out = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=data_out[0].as_ndarray()[i], + org_im=batch_data[i]['org_im'], + org_im_path=batch_data[i]['org_im_path'], + image_info=batch_data[i]['image_info'], + output_dir=output_dir, + visualization=visualization, + palette=self.palette) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segmentation(images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segmentation( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='ace2p_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/image/semantic_segmentation/ace2p/processor.py b/modules/image/semantic_segmentation/ace2p/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..34dfac1fd6f54a47a0a59c8d313980139559fe07 --- /dev/null +++ b/modules/image/semantic_segmentation/ace2p/processor.py @@ -0,0 +1,184 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time + +import base64 +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'get_palette', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def check_dir(dir_path): + """ + Create directory to save processed image. + + Args: + dir_path (str): directory path to save images. + """ + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path + + +def get_direction(src_point, rot_rad): + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + src_result = [0, 0] + src_result[0] = src_point[0] * cs - src_point[1] * sn + src_result[1] = src_point[0] * sn + src_point[1] * cs + return src_result + + +def get_3rd_point(a, b): + direct = a - b + return b + np.array([-direct[1], direct[0]], dtype=np.float32) + + +def get_affine_transform(center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0): + if not isinstance(scale, np.ndarray) and not isinstance(scale, list) and not isinstance(scale, tuple): + print(scale) + scale = np.array([scale, scale]) + + scale_tmp = scale * 200.0 + src_w = scale_tmp[0] + dst_w = output_size[1] + dst_h = output_size[0] + rot_rad = np.pi * rot / 180 + src_direction = get_direction([0, src_w * -0.5], rot_rad) + dst_direction = np.array([0, (dst_w - 1) * -0.5], np.float32) + src = np.zeros((3, 2), dtype=np.float32) + dst = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_direction + scale_tmp * shift + dst[0, :] = [(dst_w - 1) * 0.5, (dst_h - 1) * 0.5] + dst[1, :] = np.array([(dst_w - 1) * 0.5, (dst_h - 1) * 0.5]) + dst_direction + src[2:, :] = get_3rd_point(src[0, :], src[1, :]) + dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + return trans + + +def transform_logits(logits, center, scale, width, height, input_size): + trans = get_affine_transform(center, scale, 0, input_size, inv=1) + channel = logits.shape[2] + target_logits = [] + for i in range(channel): + target_logit = cv2.warpAffine( + logits[:, :, i], + trans, (int(width), int(height)), + flags=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(0)) + target_logits.append(target_logit) + target_logits = np.stack(target_logits, axis=2) + return target_logits + + +def get_palette(num_cls): + """ + Returns the color map for visualizing the segmentation mask. + + Args: + num_cls: Number of classes + + Returns: + The color map + """ + n = num_cls + palette = [0] * (n * 3) + for j in range(0, n): + lab = j + palette[j * 3 + 0] = 0 + palette[j * 3 + 1] = 0 + palette[j * 3 + 2] = 0 + i = 0 + while lab: + palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) + palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) + palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) + i += 1 + lab >>= 3 + return palette + + +def postprocess(data_out, org_im, org_im_path, image_info, output_dir, visualization, palette): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of neural network. + org_im (numpy.ndarray): orginal image. + org_im_path (str): path of original image. + image_info (dict): info about the preprocessed image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + palette (list): The palette to draw. + + Returns: + res (list[dict]): keys contain 'path', 'data', the corresponding value is: + path (str): The path of original image. + data (numpy.ndarray): The postprocessed image data, only the alpha channel. + """ + result = dict() + result['path'] = org_im_path + + image_center = image_info['image_center'] + image_scale = image_info['image_scale'] + image_width = image_info['image_width'] + image_height = image_info['image_height'] + scale = image_info['scale'] + + data_out = np.squeeze(data_out) + data_out = np.transpose(data_out, [1, 2, 0]) + logits_result = transform_logits(data_out, image_center, image_scale, image_width, image_height, scale) + parsing = np.argmax(logits_result, axis=2) + parsing_im = np.asarray(parsing, dtype=np.uint8) + result['data'] = parsing_im + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + parsing_im = Image.fromarray(parsing_im) + parsing_im.putpalette(palette) + parsing_im.save(save_im_path) + + return result diff --git a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md similarity index 100% rename from hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md rename to modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md diff --git a/hub_module/modules/image/semantic_segmentation/ace2p/__init__.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/__init__.py similarity index 100% rename from hub_module/modules/image/semantic_segmentation/ace2p/__init__.py rename to modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/__init__.py diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..6306b28f42ea475d00eb0270522595a977b2a111 --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py @@ -0,0 +1,50 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (513, 513)).astype(np.float32) + img -= np.array([104.008, 116.669, 122.675]) + img /= np.array([1.0, 1.0, 1.0]) + img = img.transpose((2, 0, 1)) + element['image'] = img + yield element diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py new file mode 100644 index 0000000000000000000000000000000000000000..9aac87575b171d10375450103b9d4a2852cf6057 --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py @@ -0,0 +1,203 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from deeplabv3p_xception65_humanseg.processor import postprocess, base64_to_cv2, cv2_to_base64 +from deeplabv3p_xception65_humanseg.data_feed import reader + + +@moduleinfo( + name="deeplabv3p_xception65_humanseg", + type="CV/semantic_segmentation", + author="baidu-vis", + author_email="", + summary="DeepLabv3+ is a semantic segmentation model.", + version="1.1.1") +class DeeplabV3pXception65HumanSeg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join(self.directory, "deeplabv3p_xception65_humanseg_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segmentation(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + data (dict): key is 'image', the corresponding value is the path to image. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + if data and 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segmentation(images=images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segmentation( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='humanseg_output', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = DeeplabV3pXception65HumanSeg() + import cv2 + img = cv2.imread('./meditation.jpg') + res = m.segmentation(images=[img]) + print(res[0]['data']) diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..ca456d9bf3ade466a56dc7b3998d8cb80fbb4cee --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py @@ -0,0 +1,86 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +from collections import OrderedDict + +import base64 +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visualization, thresh=120): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + thresh (float): threshold. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = logit[1] * 255 + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + logit -= thresh + logit[logit < 0] = 0 + logit = 255 * logit / (255 - thresh) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = rgba[:, :, 3] + else: + result['data'] = rgba[:, :, 3] + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join(output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/modules/image/style_transfer/msgnet/module.py b/modules/image/style_transfer/msgnet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..5c126124c47c403ab14d9eae02faff594227042f --- /dev/null +++ b/modules/image/style_transfer/msgnet/module.py @@ -0,0 +1,349 @@ +import os + +import paddle +import paddle.nn as nn +import numpy as np +import paddle.nn.functional as F + +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo +from paddlehub.process.transforms import Compose, Resize, CenterCrop, SetType +from paddlehub.module.cv_module import StyleTransferModule + + +class GramMatrix(nn.Layer): + """Calculate gram matrix""" + + def forward(self, y): + (b, ch, h, w) = y.shape + features = y.reshape((b, ch, w * h)) + features_t = features.transpose((0, 2, 1)) + gram = features.bmm(features_t) / (ch * h * w) + return gram + + +class ConvLayer(nn.Layer): + """Basic conv layer with reflection padding layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int): + super(ConvLayer, self).__init__() + pad = int(np.floor(kernel_size / 2)) + self.reflection_pad = nn.Pad2D([pad, pad, pad, pad], mode='reflect') + self.conv2d = nn.Conv2D(in_channels, out_channels, kernel_size, stride) + + def forward(self, x: paddle.Tensor): + out = self.reflection_pad(x) + out = self.conv2d(out) + return out + + +class UpsampleConvLayer(nn.Layer): + """ + Upsamples the input and then does a convolution. This method gives better results compared to ConvTranspose2d. + ref: http://distill.pub/2016/deconv-checkerboard/ + + Args: + in_channels(int): Number of input channels. + out_channels(int): Number of output channels. + kernel_size(int): Number of kernel size. + stride(int): Number of stride. + upsample(int): Scale factor for upsample layer, default is None. + + Return: + img(paddle.Tensor): UpsampleConvLayer output. + """ + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int, upsample=None): + super(UpsampleConvLayer, self).__init__() + self.upsample = upsample + if upsample: + self.upsample_layer = nn.Upsample(scale_factor=upsample) + self.pad = int(np.floor(kernel_size / 2)) + if self.pad != 0: + self.reflection_pad = nn.Pad2D([self.pad, self.pad, self.pad, self.pad], mode='reflect') + self.conv2d = nn.Conv2D(in_channels, out_channels, kernel_size, stride) + + def forward(self, x): + if self.upsample: + x = self.upsample_layer(x) + if self.pad != 0: + x = self.reflection_pad(x) + out = self.conv2d(x) + return out + + +class Bottleneck(nn.Layer): + """ Pre-activation residual block + Identity Mapping in Deep Residual Networks + ref https://arxiv.org/abs/1603.05027 + + Args: + inplanes(int): Number of input channels. + planes(int): Number of output channels. + stride(int): Number of stride. + downsample(int): Scale factor for downsample layer, default is None. + norm_layer(nn.Layer): Batch norm layer, default is nn.BatchNorm2D. + + Return: + img(paddle.Tensor): Bottleneck output. + """ + + def __init__(self, + inplanes: int, + planes: int, + stride: int = 1, + downsample: int = None, + norm_layer: nn.Layer = nn.BatchNorm2D): + super(Bottleneck, self).__init__() + self.expansion = 4 + self.downsample = downsample + if self.downsample is not None: + self.residual_layer = nn.Conv2D(inplanes, planes * self.expansion, kernel_size=1, stride=stride) + conv_block = (norm_layer(inplanes), nn.ReLU(), nn.Conv2D(inplanes, planes, kernel_size=1, stride=1), + norm_layer(planes), nn.ReLU(), ConvLayer(planes, planes, kernel_size=3, stride=stride), + norm_layer(planes), nn.ReLU(), nn.Conv2D( + planes, planes * self.expansion, kernel_size=1, stride=1)) + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, x: paddle.Tensor): + if self.downsample is not None: + residual = self.residual_layer(x) + else: + residual = x + m = self.conv_block(x) + return residual + self.conv_block(x) + + +class UpBottleneck(nn.Layer): + """ Up-sample residual block (from MSG-Net paper) + Enables passing identity all the way through the generator + ref https://arxiv.org/abs/1703.06953 + + Args: + inplanes(int): Number of input channels. + planes(int): Number of output channels. + stride(int): Number of stride, default is 2. + norm_layer(nn.Layer): Batch norm layer, default is nn.BatchNorm2D. + + Return: + img(paddle.Tensor): UpBottleneck output. + """ + + def __init__(self, inplanes: int, planes: int, stride: int = 2, norm_layer: nn.Layer = nn.BatchNorm2D): + super(UpBottleneck, self).__init__() + self.expansion = 4 + self.residual_layer = UpsampleConvLayer( + inplanes, planes * self.expansion, kernel_size=1, stride=1, upsample=stride) + conv_block = [] + conv_block += [norm_layer(inplanes), nn.ReLU(), nn.Conv2D(inplanes, planes, kernel_size=1, stride=1)] + conv_block += [ + norm_layer(planes), + nn.ReLU(), + UpsampleConvLayer(planes, planes, kernel_size=3, stride=1, upsample=stride) + ] + conv_block += [ + norm_layer(planes), + nn.ReLU(), + nn.Conv2D(planes, planes * self.expansion, kernel_size=1, stride=1) + ] + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, x: paddle.Tensor): + return self.residual_layer(x) + self.conv_block(x) + + +class Inspiration(nn.Layer): + """ Inspiration Layer (from MSG-Net paper) + tuning the featuremap with target Gram Matrix + ref https://arxiv.org/abs/1703.06953 + + Args: + C(int): Number of input channels. + B(int): B is equal to 1 or input mini_batch, default is 1. + + Return: + img(paddle.Tensor): UpBottleneck output. + """ + + def __init__(self, C: int, B: int = 1): + super(Inspiration, self).__init__() + + self.weight = self.weight = paddle.create_parameter(shape=[1, C, C], dtype='float32') + # non-parameter buffer + self.G = paddle.to_tensor(np.random.rand(B, C, C)) + self.C = C + + def setTarget(self, target: paddle.Tensor): + self.G = target + + def forward(self, X: paddle.Tensor): + # input X is a 3D feature map + self.P = paddle.bmm(self.weight.expand_as(self.G), self.G) + + x = paddle.bmm( + self.P.transpose((0, 2, 1)).expand((X.shape[0], self.C, self.C)), X.reshape((X.shape[0], X.shape[1], + -1))).reshape(X.shape) + return x + + def __repr__(self): + return self.__class__.__name__ + '(' \ + + 'N x ' + str(self.C) + ')' + + +class Vgg16(nn.Layer): + """ First four layers from Vgg16.""" + + def __init__(self): + super(Vgg16, self).__init__() + self.conv1_1 = nn.Conv2D(3, 64, kernel_size=3, stride=1, padding=1) + self.conv1_2 = nn.Conv2D(64, 64, kernel_size=3, stride=1, padding=1) + + self.conv2_1 = nn.Conv2D(64, 128, kernel_size=3, stride=1, padding=1) + self.conv2_2 = nn.Conv2D(128, 128, kernel_size=3, stride=1, padding=1) + + self.conv3_1 = nn.Conv2D(128, 256, kernel_size=3, stride=1, padding=1) + self.conv3_2 = nn.Conv2D(256, 256, kernel_size=3, stride=1, padding=1) + self.conv3_3 = nn.Conv2D(256, 256, kernel_size=3, stride=1, padding=1) + + self.conv4_1 = nn.Conv2D(256, 512, kernel_size=3, stride=1, padding=1) + self.conv4_2 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) + self.conv4_3 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) + + self.conv5_1 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) + self.conv5_2 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) + self.conv5_3 = nn.Conv2D(512, 512, kernel_size=3, stride=1, padding=1) + + checkpoint = os.path.join(MODULE_HOME, 'msgnet', 'vgg16.pdparams') + if not os.path.exists(checkpoint): + os.system('wget https://bj.bcebos.com/paddlehub/model/image/image_editing/vgg_paddle.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained vgg16 checkpoint success") + + def forward(self, X): + h = F.relu(self.conv1_1(X)) + h = F.relu(self.conv1_2(h)) + relu1_2 = h + h = F.max_pool2d(h, kernel_size=2, stride=2) + + h = F.relu(self.conv2_1(h)) + h = F.relu(self.conv2_2(h)) + relu2_2 = h + h = F.max_pool2d(h, kernel_size=2, stride=2) + + h = F.relu(self.conv3_1(h)) + h = F.relu(self.conv3_2(h)) + h = F.relu(self.conv3_3(h)) + relu3_3 = h + h = F.max_pool2d(h, kernel_size=2, stride=2) + + h = F.relu(self.conv4_1(h)) + h = F.relu(self.conv4_2(h)) + h = F.relu(self.conv4_3(h)) + relu4_3 = h + + return [relu1_2, relu2_2, relu3_3, relu4_3] + + +@moduleinfo( + name="msgnet", + type="CV/image_editing", + author="baidu-vis", + author_email="", + summary="Msgnet is a image colorization style transfer model, this module is trained with COCO2014 dataset.", + version="1.0.0", + meta=StyleTransferModule) +class MSGNet(nn.Layer): + """ MSGNet (from MSG-Net paper) + Enables passing identity all the way through the generator + ref https://arxiv.org/abs/1703.06953 + + Args: + input_nc(int): Number of input channels, default is 3. + output_nc(int): Number of output channels, default is 3. + ngf(int): Number of input channel for middle layer, default is 128. + n_blocks(int): Block number, default is 6. + norm_layer(nn.Layer): Batch norm layer, default is nn.InstanceNorm2D. + load_checkpoint(str): Pretrained checkpoint path, default is None. + + Return: + img(paddle.Tensor): MSGNet output. + """ + + def __init__(self, input_nc=3, output_nc=3, ngf=128, n_blocks=6, norm_layer=nn.InstanceNorm2D, + load_checkpoint=None): + super(MSGNet, self).__init__() + self.gram = GramMatrix() + block = Bottleneck + upblock = UpBottleneck + expansion = 4 + + model1 = [ + ConvLayer(input_nc, 64, kernel_size=7, stride=1), + norm_layer(64), + nn.ReLU(), + block(64, 32, 2, 1, norm_layer), + block(32 * expansion, ngf, 2, 1, norm_layer) + ] + + self.model1 = nn.Sequential(*tuple(model1)) + + model = [] + model += model1 + + self.ins = Inspiration(ngf * expansion) + model.append(self.ins) + for i in range(n_blocks): + model += [block(ngf * expansion, ngf, 1, None, norm_layer)] + + model += [ + upblock(ngf * expansion, 32, 2, norm_layer), + upblock(32 * expansion, 16, 2, norm_layer), + norm_layer(16 * expansion), + nn.ReLU(), + ConvLayer(16 * expansion, output_nc, kernel_size=7, stride=1) + ] + model = tuple(model) + self.model = nn.Sequential(*model) + + if load_checkpoint is not None: + model_dict = paddle.load(load_checkpoint) + self.set_dict(model_dict) + print("load custom checkpoint success") + + else: + checkpoint = os.path.join(self.directory, 'style_paddle.pdparams') + if not os.path.exists(checkpoint): + os.system('wget https://bj.bcebos.com/paddlehub/model/image/image_editing/style_paddle.pdparams -O ' + + checkpoint) + model_dict = paddle.load(checkpoint) + model_dict_clone = model_dict.copy() + for key, value in model_dict_clone.items(): + if key.endswith(("scale")): + name = key.rsplit('.', 1)[0] + '.bias' + model_dict[name] = paddle.zeros(shape=model_dict[name].shape, dtype='float32') + model_dict[key] = paddle.ones(shape=model_dict[key].shape, dtype='float32') + self.set_dict(model_dict) + print("load pretrained checkpoint success") + + self._vgg = None + + def transform(self, path: str): + transform = Compose([Resize( + (256, 256), interp='LINEAR'), CenterCrop(crop_size=256)], SetType(datatype='float32')) + return transform(path) + + def setTarget(self, Xs: paddle.Tensor): + """Calculate feature gram matrix""" + F = self.model1(Xs) + G = self.gram(F) + self.ins.setTarget(G) + + def getFeature(self, input: paddle.Tensor): + if not self._vgg: + self._vgg = Vgg16() + return self._vgg(input) + + def forward(self, input: paddle.Tensor): + return self.model(input) diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/README.md b/modules/image/style_transfer/stylepro_artistic/README.md similarity index 100% rename from hub_module/modules/image/style_transfer/stylepro_artistic/README.md rename to modules/image/style_transfer/stylepro_artistic/README.md diff --git a/hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/__init__.py b/modules/image/style_transfer/stylepro_artistic/__init__.py similarity index 100% rename from hub_module/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/__init__.py rename to modules/image/style_transfer/stylepro_artistic/__init__.py diff --git a/modules/image/style_transfer/stylepro_artistic/data_feed.py b/modules/image/style_transfer/stylepro_artistic/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..00cbbe4e928367d3b4cdedaf28ff4858fe811cc7 --- /dev/null +++ b/modules/image/style_transfer/stylepro_artistic/data_feed.py @@ -0,0 +1,79 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to get image data. + + Args: + images (list): list of dict objects, each dict contains key: + content(str): value is a numpy.ndarry with shape [H, W, C], content data. + styles(str): value is a list of numpy.ndarray with shape [H, W, C], styles data. + weights(str, optional): value is the interpolation weights correspond to styles. + paths (list): list of dict objects, each dict contains key: + content(str): value is the path to content. + styles(str): value is the paths to styles. + weights(str, optional): value is the interpolation weights correspond to styles. + Yield: + im (numpy.ndarray): preprocessed data, with shape (1, 3, 512, 512). + """ + pipeline_list = list() + # images + for key, data in [('im_arr', images), ('im_path', paths)]: + if data is not None: + for component in data: + each_res = OrderedDict() + # content_arr + each_res['content_arr'], w, h = _handle_single(**{key: component['content']}) + # styles_arr_list + styles_list = component['styles'] + styles_num = len(styles_list) + each_res['styles_arr_list'] = [] + for i, style_arr in enumerate(styles_list): + each_res['styles_arr_list'].append(_handle_single(**{key: style_arr})[0]) + # style_interpolation_weights + if 'weights' in component: + assert len(component['weights'] + ) == styles_num, "The number of weights must be equal to the number of styles." + each_res['style_interpolation_weights'] = component['weights'] + else: + each_res['style_interpolation_weights'] = np.ones(styles_num) + each_res['style_interpolation_weights'] = [ + each_res['style_interpolation_weights'][j] / sum(each_res['style_interpolation_weights']) + for j in range(styles_num) + ] + pipeline_list.append([each_res, w, h]) + + # yield + for element in pipeline_list: + yield element + + +def _handle_single(im_path=None, im_arr=None): + """ + Preprocess to get image data. + Args: + im_path (str): path to image. + im_arr (numpy.ndarray): image data, with shape (H, W, 3). + Returns: + im (numpy.ndarray): preprocessed data, with shape (1, 3, 512, 512). + """ + if im_path is not None: + im = cv2.imread(im_path)[:, :, ::-1].astype(np.float32) + if im_arr is not None: + im = im_arr[:, :, ::-1].astype(np.float32) + w, h = im.shape[1], im.shape[0] + im = cv2.resize(im, (512, 512), interpolation=cv2.INTER_LINEAR) + im = im.transpose((2, 0, 1)) + im = np.expand_dims(im, axis=0) + im /= 255.0 + return im, w, h diff --git a/modules/image/style_transfer/stylepro_artistic/decoder_network.py b/modules/image/style_transfer/stylepro_artistic/decoder_network.py new file mode 100644 index 0000000000000000000000000000000000000000..99a67c0aa44869ff989f7eaa176e42e179120896 --- /dev/null +++ b/modules/image/style_transfer/stylepro_artistic/decoder_network.py @@ -0,0 +1,144 @@ +# coding=utf-8 +from paddle.fluid.initializer import Constant +from paddle.fluid.param_attr import ParamAttr +import paddle.fluid as fluid + + +def decoder_net(): + x2paddle_22 = fluid.layers.create_parameter( + dtype='float32', shape=[4], name='x2paddle_22', attr='x2paddle_22', default_initializer=Constant(0.0)) + x2paddle_36 = fluid.layers.create_parameter( + dtype='float32', shape=[4], name='x2paddle_36', attr='x2paddle_36', default_initializer=Constant(0.0)) + x2paddle_44 = fluid.layers.create_parameter( + dtype='float32', shape=[4], name='x2paddle_44', attr='x2paddle_44', default_initializer=Constant(0.0)) + x2paddle_input_1 = fluid.layers.data( + dtype='float32', shape=[1, 512, 64, 64], name='x2paddle_input_1', append_batch_size=False) + x2paddle_19 = fluid.layers.pad2d( + x2paddle_input_1, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_19') + x2paddle_20 = fluid.layers.conv2d( + x2paddle_19, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_1', + name='x2paddle_20', + bias_attr='x2paddle_2') + x2paddle_21 = fluid.layers.relu(x2paddle_20, name='x2paddle_21') + x2paddle_23 = fluid.layers.resize_nearest(x2paddle_21, name='x2paddle_23', out_shape=[128, 128]) + x2paddle_24 = fluid.layers.pad2d( + x2paddle_23, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_24') + x2paddle_25 = fluid.layers.conv2d( + x2paddle_24, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_3', + name='x2paddle_25', + bias_attr='x2paddle_4') + x2paddle_26 = fluid.layers.relu(x2paddle_25, name='x2paddle_26') + x2paddle_27 = fluid.layers.pad2d( + x2paddle_26, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_27') + x2paddle_28 = fluid.layers.conv2d( + x2paddle_27, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_5', + name='x2paddle_28', + bias_attr='x2paddle_6') + x2paddle_29 = fluid.layers.relu(x2paddle_28, name='x2paddle_29') + x2paddle_30 = fluid.layers.pad2d( + x2paddle_29, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_30') + x2paddle_31 = fluid.layers.conv2d( + x2paddle_30, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_7', + name='x2paddle_31', + bias_attr='x2paddle_8') + x2paddle_32 = fluid.layers.relu(x2paddle_31, name='x2paddle_32') + x2paddle_33 = fluid.layers.pad2d( + x2paddle_32, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_33') + x2paddle_34 = fluid.layers.conv2d( + x2paddle_33, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_9', + name='x2paddle_34', + bias_attr='x2paddle_10') + x2paddle_35 = fluid.layers.relu(x2paddle_34, name='x2paddle_35') + x2paddle_37 = fluid.layers.resize_nearest(x2paddle_35, name='x2paddle_37', out_shape=[256, 256]) + x2paddle_38 = fluid.layers.pad2d( + x2paddle_37, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_38') + x2paddle_39 = fluid.layers.conv2d( + x2paddle_38, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_11', + name='x2paddle_39', + bias_attr='x2paddle_12') + x2paddle_40 = fluid.layers.relu(x2paddle_39, name='x2paddle_40') + x2paddle_41 = fluid.layers.pad2d( + x2paddle_40, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_41') + x2paddle_42 = fluid.layers.conv2d( + x2paddle_41, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_13', + name='x2paddle_42', + bias_attr='x2paddle_14') + x2paddle_43 = fluid.layers.relu(x2paddle_42, name='x2paddle_43') + x2paddle_45 = fluid.layers.resize_nearest(x2paddle_43, name='x2paddle_45', out_shape=[512, 512]) + x2paddle_46 = fluid.layers.pad2d( + x2paddle_45, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_46') + x2paddle_47 = fluid.layers.conv2d( + x2paddle_46, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_15', + name='x2paddle_47', + bias_attr='x2paddle_16') + x2paddle_48 = fluid.layers.relu(x2paddle_47, name='x2paddle_48') + x2paddle_49 = fluid.layers.pad2d( + x2paddle_48, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_49') + x2paddle_50 = fluid.layers.conv2d( + x2paddle_49, + num_filters=3, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_17', + name='x2paddle_50', + bias_attr='x2paddle_18') + return x2paddle_input_1, x2paddle_50 diff --git a/modules/image/style_transfer/stylepro_artistic/encoder_network.py b/modules/image/style_transfer/stylepro_artistic/encoder_network.py new file mode 100644 index 0000000000000000000000000000000000000000..0bff785c65e933669fbd790565156da3edaed33d --- /dev/null +++ b/modules/image/style_transfer/stylepro_artistic/encoder_network.py @@ -0,0 +1,173 @@ +# coding=utf-8 +from paddle.fluid.initializer import Constant +from paddle.fluid.param_attr import ParamAttr +import paddle.fluid as fluid + + +def encoder_net(): + x2paddle_0 = fluid.layers.data(dtype='float32', shape=[1, 3, 512, 512], name='x2paddle_0', append_batch_size=False) + x2paddle_21 = fluid.layers.conv2d( + x2paddle_0, + num_filters=3, + filter_size=[1, 1], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_1', + name='x2paddle_21', + bias_attr='x2paddle_2') + x2paddle_22 = fluid.layers.pad2d( + x2paddle_21, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_22') + x2paddle_23 = fluid.layers.conv2d( + x2paddle_22, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_3', + name='x2paddle_23', + bias_attr='x2paddle_4') + x2paddle_24 = fluid.layers.relu(x2paddle_23, name='x2paddle_24') + x2paddle_25 = fluid.layers.pad2d( + x2paddle_24, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_25') + x2paddle_26 = fluid.layers.conv2d( + x2paddle_25, + num_filters=64, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_5', + name='x2paddle_26', + bias_attr='x2paddle_6') + x2paddle_27 = fluid.layers.relu(x2paddle_26, name='x2paddle_27') + x2paddle_28 = fluid.layers.pool2d( + x2paddle_27, + pool_size=[2, 2], + pool_type='max', + pool_stride=[2, 2], + pool_padding=[0, 0], + ceil_mode=False, + name='x2paddle_28', + exclusive=False) + x2paddle_29 = fluid.layers.pad2d( + x2paddle_28, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_29') + x2paddle_30 = fluid.layers.conv2d( + x2paddle_29, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_7', + name='x2paddle_30', + bias_attr='x2paddle_8') + x2paddle_31 = fluid.layers.relu(x2paddle_30, name='x2paddle_31') + x2paddle_32 = fluid.layers.pad2d( + x2paddle_31, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_32') + x2paddle_33 = fluid.layers.conv2d( + x2paddle_32, + num_filters=128, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_9', + name='x2paddle_33', + bias_attr='x2paddle_10') + x2paddle_34 = fluid.layers.relu(x2paddle_33, name='x2paddle_34') + x2paddle_35 = fluid.layers.pool2d( + x2paddle_34, + pool_size=[2, 2], + pool_type='max', + pool_stride=[2, 2], + pool_padding=[0, 0], + ceil_mode=False, + name='x2paddle_35', + exclusive=False) + x2paddle_36 = fluid.layers.pad2d( + x2paddle_35, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_36') + x2paddle_37 = fluid.layers.conv2d( + x2paddle_36, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_11', + name='x2paddle_37', + bias_attr='x2paddle_12') + x2paddle_38 = fluid.layers.relu(x2paddle_37, name='x2paddle_38') + x2paddle_39 = fluid.layers.pad2d( + x2paddle_38, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_39') + x2paddle_40 = fluid.layers.conv2d( + x2paddle_39, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_13', + name='x2paddle_40', + bias_attr='x2paddle_14') + x2paddle_41 = fluid.layers.relu(x2paddle_40, name='x2paddle_41') + x2paddle_42 = fluid.layers.pad2d( + x2paddle_41, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_42') + x2paddle_43 = fluid.layers.conv2d( + x2paddle_42, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_15', + name='x2paddle_43', + bias_attr='x2paddle_16') + x2paddle_44 = fluid.layers.relu(x2paddle_43, name='x2paddle_44') + x2paddle_45 = fluid.layers.pad2d( + x2paddle_44, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_45') + x2paddle_46 = fluid.layers.conv2d( + x2paddle_45, + num_filters=256, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_17', + name='x2paddle_46', + bias_attr='x2paddle_18') + x2paddle_47 = fluid.layers.relu(x2paddle_46, name='x2paddle_47') + x2paddle_48 = fluid.layers.pool2d( + x2paddle_47, + pool_size=[2, 2], + pool_type='max', + pool_stride=[2, 2], + pool_padding=[0, 0], + ceil_mode=False, + name='x2paddle_48', + exclusive=False) + x2paddle_49 = fluid.layers.pad2d( + x2paddle_48, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_49') + x2paddle_50 = fluid.layers.conv2d( + x2paddle_49, + num_filters=512, + filter_size=[3, 3], + stride=[1, 1], + padding=[0, 0], + dilation=[1, 1], + groups=1, + param_attr='x2paddle_19', + name='x2paddle_50', + bias_attr='x2paddle_20') + x2paddle_51 = fluid.layers.relu(x2paddle_50, name='x2paddle_51') + return x2paddle_0, x2paddle_51 diff --git a/modules/image/style_transfer/stylepro_artistic/module.py b/modules/image/style_transfer/stylepro_artistic/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b6739014d2f92eeabfa99d7993aa44e6dc0e8cf6 --- /dev/null +++ b/modules/image/style_transfer/stylepro_artistic/module.py @@ -0,0 +1,229 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division + +import ast +import copy +import time +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from stylepro_artistic.encoder_network import encoder_net +from stylepro_artistic.decoder_network import decoder_net +from stylepro_artistic.processor import postprocess, fr, cv2_to_base64, base64_to_cv2 +from stylepro_artistic.data_feed import reader + + +@moduleinfo( + name="stylepro_artistic", + version="1.0.1", + type="cv/style_transfer", + summary="StylePro Artistic is an algorithm for Arbitrary image style, which is parameter-free, fast yet effective.", + author="baidu-bdl", + author_email="") +class StyleProjection(hub.Module): + def _initialize(self): + self.pretrained_encoder_net = os.path.join(self.directory, "style_projection_enc") + self.pretrained_decoder_net = os.path.join(self.directory, "style_projection_dec") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + # encoder + cpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) + cpu_config_enc.disable_glog_info() + cpu_config_enc.disable_gpu() + self.cpu_predictor_enc = create_paddle_predictor(cpu_config_enc) + # decoder + cpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) + cpu_config_dec.disable_glog_info() + cpu_config_dec.disable_gpu() + self.cpu_predictor_dec = create_paddle_predictor(cpu_config_dec) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + # encoder + gpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) + gpu_config_enc.disable_glog_info() + gpu_config_enc.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor_enc = create_paddle_predictor(gpu_config_enc) + # decoder + gpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) + gpu_config_dec.disable_glog_info() + gpu_config_dec.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor_dec = create_paddle_predictor(gpu_config_dec) + + def style_transfer(self, + images=None, + paths=None, + alpha=1, + use_gpu=False, + output_dir='transfer_result', + visualization=False): + """ + API for image style transfer. + + Args: + images (list): list of dict objects, each dict contains key: + content(str): value is a numpy.ndarry with shape [H, W, C], content data. + styles(str): value is a list of numpy.ndarray with shape [H, W, C], styles data. + weights(str, optional): value is the interpolation weights correspond to styles. + paths (list): list of dict objects, each dict contains key: + content(str): value is the path to content. + styles(str): value is the paths to styles. + weights(str, optional): value is the interpolation weights correspond to styles. + alpha (float): The weight that controls the degree of stylization. Should be between 0 and 1. + use_gpu (bool): whether to use gpu. + output_dir (str): the path to store output images. + visualization (bool): whether to save image or not. + + Returns: + im_output (list[dict()]): list of output images and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + im_output = [] + for component, w, h in reader(images, paths): + content = PaddleTensor(component['content_arr'].copy()) + content_feats = self.gpu_predictor_enc.run([content]) if use_gpu else self.cpu_predictor_enc.run([content]) + accumulate = np.zeros((3, 512, 512)) + for idx, style_arr in enumerate(component['styles_arr_list']): + style = PaddleTensor(style_arr.copy()) + # encode + style_feats = self.gpu_predictor_enc.run([style]) if use_gpu else self.cpu_predictor_enc.run([style]) + fr_feats = fr(content_feats[0].as_ndarray(), style_feats[0].as_ndarray(), alpha) + fr_feats = PaddleTensor(fr_feats.copy()) + # decode + predict_outputs = self.gpu_predictor_dec.run([fr_feats]) if use_gpu else self.cpu_predictor_dec.run( + [fr_feats]) + # interpolation + accumulate += predict_outputs[0].as_ndarray()[0] * component['style_interpolation_weights'][idx] + # postprocess + save_im_name = 'ndarray_{}.jpg'.format(time.time()) + result = postprocess(accumulate, output_dir, save_im_name, visualization, size=(w, h)) + im_output.append(result) + return im_output + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + encode_dirname = os.path.join(dirname, 'encoder') + decode_dirname = os.path.join(dirname, 'decoder') + self._save_encode_model(encode_dirname, model_filename, params_filename, combined) + self._save_decode_model(decode_dirname, model_filename, params_filename, combined) + + def _save_encode_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + encode_program, encode_feeded_var_names, encode_target_vars = fluid.io.load_inference_model( + dirname=self.pretrained_encoder_net, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=encode_program, + executor=exe, + feeded_var_names=encode_feeded_var_names, + target_vars=encode_target_vars, + model_filename=model_filename, + params_filename=params_filename) + + def _save_decode_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + decode_program, decode_feeded_var_names, decode_target_vars = fluid.io.load_inference_model( + dirname=self.pretrained_decoder_net, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=decode_program, + executor=exe, + feeded_var_names=decode_feeded_var_names, + target_vars=decode_target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['content'] = base64_to_cv2(image['content']) + image['styles'] = [base64_to_cv2(style) for style in image['styles']] + results = self.style_transfer(images_decode, **kwargs) + results = [{'data': cv2_to_base64(result['data'])} for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.weights is None: + paths = [{'content': args.content, 'styles': args.styles.split(',')}] + else: + paths = [{'content': args.content, 'styles': args.styles.split(','), 'weights': list(args.weights)}] + results = self.style_transfer( + paths=paths, alpha=args.alpha, use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=True) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--content', type=str, help="path to content.") + self.arg_input_group.add_argument('--styles', type=str, help="path to styles.") + self.arg_input_group.add_argument( + '--weights', type=ast.literal_eval, default=None, help="interpolation weights of styles.") + self.arg_config_group.add_argument( + '--alpha', type=ast.literal_eval, default=1, help="The parameter to control the tranform degree.") diff --git a/modules/image/style_transfer/stylepro_artistic/processor.py b/modules/image/style_transfer/stylepro_artistic/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..62af41547cbb505576f6df4815d8a06711e83e0d --- /dev/null +++ b/modules/image/style_transfer/stylepro_artistic/processor.py @@ -0,0 +1,106 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import base64 +import cv2 +import numpy as np + +__all__ = ['postprocess', 'fr'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(im, output_dir, save_im_name, visualization, size): + im = np.multiply(im, 255.0) + 0.5 + im = np.clip(im, 0, 255) + im = im.astype(np.uint8) + im = im.transpose((1, 2, 0)) + im = im[:, :, ::-1] + im = cv2.resize(im, (size[0], size[1]), interpolation=cv2.INTER_LINEAR) + result = {'data': im} + if visualization: + if not os.path.exists(output_dir): + os.makedirs(output_dir) + elif os.path.isfile(output_dir): + os.remove(output_dir) + os.makedirs(output_dir) + # save image + save_path = os.path.join(output_dir, save_im_name) + cv2.imwrite(save_path, im) + result['save_path'] = save_path + return result + + +def fr(content_feat, style_feat, alpha): + content_feat = np.reshape(content_feat, (512, -1)) + style_feat = np.reshape(style_feat, (512, -1)) + + content_feat_index = np.argsort(content_feat, axis=1) + style_feat = np.sort(style_feat, axis=1) + + fr_feat = scatter_numpy(dim=1, index=content_feat_index, src=style_feat) + fr_feat = fr_feat * alpha + content_feat * (1 - alpha) + fr_feat = np.reshape(fr_feat, (1, 512, 64, 64)) + return fr_feat + + +def scatter_numpy(dim, index, src): + """ + Writes all values from the Tensor src into dst at the indices specified in the index Tensor. + + :param dim: The axis along which to index + :param index: The indices of elements to scatter + :param src: The source element(s) to scatter + :return: dst + """ + dst = src.copy() + idx_xsection_shape = index.shape[:dim] + index.shape[dim + 1:] + dst_xsection_shape = dst.shape[:dim] + dst.shape[dim + 1:] + if idx_xsection_shape != dst_xsection_shape: + raise ValueError("Except for dimension " + str(dim) + + ", all dimensions of index and output should be the same size") + if (index >= dst.shape[dim]).any() or (index < 0).any(): + raise IndexError("The values of index must be between 0 and {}.".format(dst.shape[dim] - 1)) + + def make_slice(arr, dim, i): + slc = [slice(None)] * arr.ndim + slc[dim] = i + return tuple(slc) + + # We use index and dim parameters to create idx + # idx is in a form that can be used as a NumPy advanced index for scattering of src param. + idx = [[ + *np.indices(idx_xsection_shape).reshape(index.ndim - 1, -1), index[make_slice(index, dim, i)].reshape(1, -1)[0] + ] for i in range(index.shape[dim])] + idx = list(np.concatenate(idx, axis=1)) + idx.insert(dim, idx.pop()) + + if not np.isscalar(src): + if index.shape[dim] > src.shape[dim]: + raise IndexError("Dimension " + str(dim) + "of index can not be bigger than that of src ") + src_xsection_shape = src.shape[:dim] + src.shape[dim + 1:] + if idx_xsection_shape != src_xsection_shape: + raise ValueError("Except for dimension " + str(dim) + + ", all dimensions of index and src should be the same size") + # src_idx is a NumPy advanced index for indexing of elements in the src + src_idx = list(idx) + src_idx.pop(dim) + src_idx.insert(dim, np.repeat(np.arange(index.shape[dim]), np.prod(idx_xsection_shape))) + dst[tuple(idx)] = src[tuple(src_idx)] + else: + dst[idx] = src + return dst diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md rename to modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md diff --git a/hub_module/modules/image/style_transfer/stylepro_artistic/__init__.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/__init__.py similarity index 100% rename from hub_module/modules/image/style_transfer/stylepro_artistic/__init__.py rename to modules/image/text_recognition/chinese_ocr_db_crnn_mobile/__init__.py diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/ppocr_keys_v1.txt b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/ppocr_keys_v1.txt similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/ppocr_keys_v1.txt rename to modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/ppocr_keys_v1.txt diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/simfang.ttf b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/simfang.ttf similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/simfang.ttf rename to modules/image/text_recognition/chinese_ocr_db_crnn_mobile/assets/simfang.ttf diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py new file mode 100644 index 0000000000000000000000000000000000000000..bf6d21f6e7d40db971d17d5d76a68daac768d2dd --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/character.py @@ -0,0 +1,163 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import string + + +class CharacterOps(object): + """ Convert between text-label and text-index """ + + def __init__(self, config): + self.character_type = config['character_type'] + self.loss_type = config['loss_type'] + if self.character_type == "en": + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + elif self.character_type == "ch": + character_dict_path = config['character_dict_path'] + self.character_str = "" + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n") + self.character_str += line + dict_character = list(self.character_str) + elif self.character_type == "en_sensitive": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + else: + self.character_str = None + assert self.character_str is not None, \ + "Nonsupport type of the character: {}".format(self.character_str) + self.beg_str = "sos" + self.end_str = "eos" + if self.loss_type == "attention": + dict_character = [self.beg_str, self.end_str] + dict_character + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def encode(self, text): + """convert text-label into text-index. + input: + text: text labels of each image. [batch_size] + + output: + text: concatenated text index for CTCLoss. + [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] + length: length of each text. [batch_size] + """ + if self.character_type == "en": + text = text.lower() + + text_list = [] + for char in text: + if char not in self.dict: + continue + text_list.append(self.dict[char]) + text = np.array(text_list) + return text + + def decode(self, text_index, is_remove_duplicate=False): + """ convert text-index into text-label. """ + char_list = [] + char_num = self.get_char_num() + + if self.loss_type == "attention": + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + ignored_tokens = [beg_idx, end_idx] + else: + ignored_tokens = [char_num] + + for idx in range(len(text_index)): + if text_index[idx] in ignored_tokens: + continue + if is_remove_duplicate: + if idx > 0 and text_index[idx - 1] == text_index[idx]: + continue + char_list.append(self.character[text_index[idx]]) + text = ''.join(char_list) + return text + + def get_char_num(self): + return len(self.character) + + def get_beg_end_flag_idx(self, beg_or_end): + if self.loss_type == "attention": + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx"\ + % beg_or_end + return idx + else: + err = "error in get_beg_end_flag_idx when using the loss %s"\ + % (self.loss_type) + assert False, err + + +def cal_predicts_accuracy(char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate=False): + acc_num = 0 + img_num = 0 + for ino in range(len(labels_lod) - 1): + beg_no = preds_lod[ino] + end_no = preds_lod[ino + 1] + preds_text = preds[beg_no:end_no].reshape(-1) + preds_text = char_ops.decode(preds_text, is_remove_duplicate) + + beg_no = labels_lod[ino] + end_no = labels_lod[ino + 1] + labels_text = labels[beg_no:end_no].reshape(-1) + labels_text = char_ops.decode(labels_text, is_remove_duplicate) + img_num += 1 + + if preds_text == labels_text: + acc_num += 1 + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + +def convert_rec_attention_infer_res(preds): + img_num = preds.shape[0] + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + end_pos = np.where(preds[ino, :] == 1)[0] + if len(end_pos) <= 1: + text_list = preds[ino, 1:] + else: + text_list = preds[ino, 1:end_pos[1]] + target_lod.append(target_lod[ino] + len(text_list)) + convert_ids = convert_ids + list(text_list) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod + + +def convert_rec_label_to_lod(ori_labels): + img_num = len(ori_labels) + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + target_lod.append(target_lod[ino] + len(ori_labels[ino])) + convert_ids = convert_ids + list(ori_labels[ino]) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..13c92f495de682a70e8ed2a1c51383a68b0cf391 --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py @@ -0,0 +1,371 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import copy +import math +import os +import time + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, runnable, serving +from PIL import Image +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +from chinese_ocr_db_crnn_mobile.character import CharacterOps +from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes + + +@moduleinfo( + name="chinese_ocr_db_crnn_mobile", + version="1.0.3", + summary= + "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/text_recognition") +class ChineseOCRDBCRNN(hub.Module): + def _initialize(self, text_detector_module=None): + """ + initialize with the necessary elements + """ + self.character_dict_path = os.path.join(self.directory, 'assets', 'ppocr_keys_v1.txt') + char_ops_params = {'character_type': 'ch', 'character_dict_path': self.character_dict_path, 'loss_type': 'ctc'} + self.char_ops = CharacterOps(char_ops_params) + self.rec_image_shape = [3, 32, 320] + self._text_detector_module = text_detector_module + self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') + self.pretrained_model_path = os.path.join(self.directory, 'inference_model') + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + + config = AnalysisConfig(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + + config.disable_glog_info() + + # use zero copy + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + self.predictor = create_paddle_predictor(config) + input_names = self.predictor.get_input_names() + self.input_tensor = self.predictor.get_input_tensor(input_names[0]) + output_names = self.predictor.get_output_names() + self.output_tensors = [] + for output_name in output_names: + output_tensor = self.predictor.get_output_tensor(output_name) + self.output_tensors.append(output_tensor) + + @property + def text_detector_module(self): + """ + text detect module + """ + if not self._text_detector_module: + self._text_detector_module = hub.Module(name='chinese_text_detection_db_mobile') + return self._text_detector_module + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def get_rotate_crop_image(self, img, points): + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + img_crop_width = int(np.linalg.norm(points[0] - points[1])) + img_crop_height = int(np.linalg.norm(points[0] - points[3])) + pts_std = np.float32([[0, 0], [img_crop_width, 0],\ + [img_crop_width, img_crop_height], [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective(img_crop, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img + + def resize_norm_img(self, img, max_wh_ratio): + imgC, imgH, imgW = self.rec_image_shape + imgW = int(32 * max_wh_ratio) + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def recognize_text(self, + images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5): + """ + Get the chinese texts in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + use_gpu (bool): Whether to use gpu. + batch_size(int): the program deals once with one + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + box_thresh(float): the threshold of the detected text box's confidence + text_thresh(float): the threshold of the recognize chinese texts' confidence + Returns: + res (list): The result of chinese texts and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + + self.use_gpu = use_gpu + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + detection_results = self.text_detector_module.detect_text( + images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) + boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] + all_results = [] + for index, img_boxes in enumerate(boxes): + original_image = predicted_data[index].copy() + result = {'save_path': ''} + if img_boxes is None: + result['data'] = [] + else: + img_crop_list = [] + boxes = sorted_boxes(img_boxes) + for num_box in range(len(boxes)): + tmp_box = copy.deepcopy(boxes[num_box]) + img_crop = self.get_rotate_crop_image(original_image, tmp_box) + img_crop_list.append(img_crop) + + rec_results = self._recognize_text(img_crop_list) + # if the recognized text confidence score is lower than text_thresh, then drop it + rec_res_final = [] + for index, res in enumerate(rec_results): + text, score = res + if score >= text_thresh: + rec_res_final.append({ + 'text': text, + 'confidence': float(score), + 'text_box_position': boxes[index].astype(np.int).tolist() + }) + result['data'] = rec_res_final + + if visualization and result['data']: + result['save_path'] = self.save_result_image(original_image, boxes, rec_results, output_dir, + text_thresh) + all_results.append(result) + + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + def save_result_image(self, original_image, detection_boxes, rec_results, output_dir='ocr_result', text_thresh=0.5): + image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + txts = [item[0] for item in rec_results] + scores = [item[1] for item in rec_results] + draw_img = draw_ocr( + image, detection_boxes, txts, scores, font_file=self.font_file, draw_txt=True, drop_score=text_thresh) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + save_file_path = os.path.join(output_dir, saved_name) + cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) + return save_file_path + + def _recognize_text(self, image_list): + img_num = len(image_list) + batch_num = 30 + rec_res = [] + predict_time = 0 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = image_list[ino].shape[0:2] + wh_ratio = w / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(image_list[ino], max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.zero_copy_run() + rec_idx_batch = self.output_tensors[0].copy_to_cpu() + rec_idx_lod = self.output_tensors[0].lod()[0] + predict_batch = self.output_tensors[1].copy_to_cpu() + predict_lod = self.output_tensors[1].lod()[0] + + for rno in range(len(rec_idx_lod) - 1): + beg = rec_idx_lod[rno] + end = rec_idx_lod[rno + 1] + rec_idx_tmp = rec_idx_batch[beg:end, 0] + preds_text = self.char_ops.decode(rec_idx_tmp) + beg = predict_lod[rno] + end = predict_lod[rno + 1] + probs = predict_batch[beg:end, :] + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + if len(valid_ind) == 0: + continue + score = np.mean(probs[valid_ind, ind[valid_ind]]) + rec_res.append([preds_text, score]) + + return rec_res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + detector_dir = os.path.join(dirname, 'text_detector') + recognizer_dir = os.path.join(dirname, 'text_recognizer') + self._save_detector_model(detector_dir, model_filename, params_filename, combined) + self._save_recognizer_model(recognizer_dir, model_filename, params_filename, combined) + logger.info("The inference model has been saved in the path {}".format(os.path.realpath(dirname))) + + def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): + self.text_detector_module.save_inference_model(dirname, model_filename, params_filename, combined) + + def _save_recognizer_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.recognize_text( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='ocr_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") + + +if __name__ == '__main__': + ocr = ChineseOCRDBCRNN() + image_path = [ + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' + ] + res = ocr.recognize_text(paths=image_path, visualization=True) + ocr.save_inference_model('save') + print(res) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5574fbabc972dfd17cc6cf04a8145b77c49abab6 --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/utils.py @@ -0,0 +1,177 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from PIL import Image, ImageDraw, ImageFont +import base64 +import cv2 +import numpy as np + + +def draw_ocr(image, boxes, txts, scores, font_file, draw_txt=True, drop_score=0.5): + """ + Visualize the results of OCR detection and recognition + args: + image(Image|array): RGB image + boxes(list): boxes with shape(N, 4, 2) + txts(list): the texts + scores(list): txxs corresponding scores + draw_txt(bool): whether draw text or not + drop_score(float): only scores greater than drop_threshold will be visualized + return(array): + the visualized img + """ + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score or math.isnan(score): + continue + box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) + image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) + + if draw_txt: + img = np.array(resize_img(image, input_size=600)) + txt_img = text_visual(txts, scores, font_file, img_h=img.shape[0], img_w=600, threshold=drop_score) + img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) + return img + return image + + +def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): + """ + create new blank img and draw txt on it + args: + texts(list): the text will be draw + scores(list|None): corresponding score of each txt + img_h(int): the height of blank img + img_w(int): the width of blank img + return(array): + """ + if scores is not None: + assert len(texts) == len(scores), "The number of txts and corresponding scores must match" + + def create_blank_img(): + blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 + blank_img[:, img_w - 1:] = 0 + blank_img = Image.fromarray(blank_img).convert("RGB") + draw_txt = ImageDraw.Draw(blank_img) + return blank_img, draw_txt + + blank_img, draw_txt = create_blank_img() + + font_size = 20 + txt_color = (0, 0, 0) + font = ImageFont.truetype(font_file, font_size, encoding="utf-8") + + gap = font_size + 5 + txt_img_list = [] + count, index = 1, 0 + for idx, txt in enumerate(texts): + index += 1 + if scores[idx] < threshold or math.isnan(scores[idx]): + index -= 1 + continue + first_line = True + while str_count(txt) >= img_w // font_size - 4: + tmp = txt + txt = tmp[:img_w // font_size - 4] + if first_line: + new_txt = str(index) + ': ' + txt + first_line = False + else: + new_txt = ' ' + txt + draw_txt.text((0, gap * count), new_txt, txt_color, font=font) + txt = tmp[img_w // font_size - 4:] + if count >= img_h // gap - 1: + txt_img_list.append(np.array(blank_img)) + blank_img, draw_txt = create_blank_img() + count = 0 + count += 1 + if first_line: + new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) + else: + new_txt = " " + txt + " " + '%.3f' % (scores[idx]) + draw_txt.text((0, gap * count), new_txt, txt_color, font=font) + # whether add new blank img or not + if count >= img_h // gap - 1 and idx + 1 < len(texts): + txt_img_list.append(np.array(blank_img)) + blank_img, draw_txt = create_blank_img() + count = 0 + count += 1 + txt_img_list.append(np.array(blank_img)) + if len(txt_img_list) == 1: + blank_img = np.array(txt_img_list[0]) + else: + blank_img = np.concatenate(txt_img_list, axis=1) + return np.array(blank_img) + + +def str_count(s): + """ + Count the number of Chinese characters, + a single English character and a single number + equal to half the length of Chinese characters. + args: + s(string): the input of string + return(int): + the number of Chinese characters + """ + import string + count_zh = count_pu = 0 + s_len = len(s) + en_dg_count = 0 + for c in s: + if c in string.ascii_letters or c.isdigit() or c.isspace(): + en_dg_count += 1 + elif c.isalpha(): + count_zh += 1 + else: + count_pu += 1 + return s_len - math.ceil(en_dg_count / 2) + + +def resize_img(img, input_size=600): + img = np.array(img) + im_shape = img.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + im_scale = float(input_size) / float(im_size_max) + im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) + return im + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def sorted_boxes(dt_boxes): + """ + Sort text boxes in order from top to bottom, left to right + args: + dt_boxes(array):detected text boxes with shape [4, 2] + return: + sorted boxes(array) with shape [4, 2] + """ + num_boxes = dt_boxes.shape[0] + sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) + _boxes = list(sorted_boxes) + + for i in range(num_boxes - 1): + if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \ + (_boxes[i + 1][0][0] < _boxes[i][0][0]): + tmp = _boxes[i] + _boxes[i] = _boxes[i + 1] + _boxes[i + 1] = tmp + return _boxes + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md b/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md rename to modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/__init__.py similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/__init__.py rename to modules/image/text_recognition/chinese_ocr_db_crnn_server/__init__.py diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py new file mode 100644 index 0000000000000000000000000000000000000000..bf6d21f6e7d40db971d17d5d76a68daac768d2dd --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/character.py @@ -0,0 +1,163 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import string + + +class CharacterOps(object): + """ Convert between text-label and text-index """ + + def __init__(self, config): + self.character_type = config['character_type'] + self.loss_type = config['loss_type'] + if self.character_type == "en": + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + elif self.character_type == "ch": + character_dict_path = config['character_dict_path'] + self.character_str = "" + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n") + self.character_str += line + dict_character = list(self.character_str) + elif self.character_type == "en_sensitive": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + else: + self.character_str = None + assert self.character_str is not None, \ + "Nonsupport type of the character: {}".format(self.character_str) + self.beg_str = "sos" + self.end_str = "eos" + if self.loss_type == "attention": + dict_character = [self.beg_str, self.end_str] + dict_character + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def encode(self, text): + """convert text-label into text-index. + input: + text: text labels of each image. [batch_size] + + output: + text: concatenated text index for CTCLoss. + [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] + length: length of each text. [batch_size] + """ + if self.character_type == "en": + text = text.lower() + + text_list = [] + for char in text: + if char not in self.dict: + continue + text_list.append(self.dict[char]) + text = np.array(text_list) + return text + + def decode(self, text_index, is_remove_duplicate=False): + """ convert text-index into text-label. """ + char_list = [] + char_num = self.get_char_num() + + if self.loss_type == "attention": + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + ignored_tokens = [beg_idx, end_idx] + else: + ignored_tokens = [char_num] + + for idx in range(len(text_index)): + if text_index[idx] in ignored_tokens: + continue + if is_remove_duplicate: + if idx > 0 and text_index[idx - 1] == text_index[idx]: + continue + char_list.append(self.character[text_index[idx]]) + text = ''.join(char_list) + return text + + def get_char_num(self): + return len(self.character) + + def get_beg_end_flag_idx(self, beg_or_end): + if self.loss_type == "attention": + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx"\ + % beg_or_end + return idx + else: + err = "error in get_beg_end_flag_idx when using the loss %s"\ + % (self.loss_type) + assert False, err + + +def cal_predicts_accuracy(char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate=False): + acc_num = 0 + img_num = 0 + for ino in range(len(labels_lod) - 1): + beg_no = preds_lod[ino] + end_no = preds_lod[ino + 1] + preds_text = preds[beg_no:end_no].reshape(-1) + preds_text = char_ops.decode(preds_text, is_remove_duplicate) + + beg_no = labels_lod[ino] + end_no = labels_lod[ino + 1] + labels_text = labels[beg_no:end_no].reshape(-1) + labels_text = char_ops.decode(labels_text, is_remove_duplicate) + img_num += 1 + + if preds_text == labels_text: + acc_num += 1 + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + +def convert_rec_attention_infer_res(preds): + img_num = preds.shape[0] + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + end_pos = np.where(preds[ino, :] == 1)[0] + if len(end_pos) <= 1: + text_list = preds[ino, 1:] + else: + text_list = preds[ino, 1:end_pos[1]] + target_lod.append(target_lod[ino] + len(text_list)) + convert_ids = convert_ids + list(text_list) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod + + +def convert_rec_label_to_lod(ori_labels): + img_num = len(ori_labels) + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + target_lod.append(target_lod[ino] + len(ori_labels[ino])) + convert_ids = convert_ids + list(ori_labels[ino]) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2ffd632b485f10fa76e7c10ac06618d99c1280a3 --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py @@ -0,0 +1,372 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import copy +import math +import os +import time + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, runnable, serving +from PIL import Image +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +from chinese_ocr_db_crnn_server.character import CharacterOps +from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes + + +@moduleinfo( + name="chinese_ocr_db_crnn_server", + version="1.0.2", + summary= + "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/text_recognition") +class ChineseOCRDBCRNNServer(hub.Module): + def _initialize(self, text_detector_module=None): + """ + initialize with the necessary elements + """ + self.character_dict_path = os.path.join(self.directory, 'assets', 'ppocr_keys_v1.txt') + char_ops_params = {'character_type': 'ch', 'character_dict_path': self.character_dict_path, 'loss_type': 'ctc'} + self.char_ops = CharacterOps(char_ops_params) + self.rec_image_shape = [3, 32, 320] + self._text_detector_module = text_detector_module + self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') + self.pretrained_model_path = os.path.join(self.directory, 'assets', 'ch_rec_r34_vd_crnn') + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + + config = AnalysisConfig(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + + config.disable_glog_info() + + # use zero copy + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + self.predictor = create_paddle_predictor(config) + input_names = self.predictor.get_input_names() + self.input_tensor = self.predictor.get_input_tensor(input_names[0]) + output_names = self.predictor.get_output_names() + self.output_tensors = [] + for output_name in output_names: + output_tensor = self.predictor.get_output_tensor(output_name) + self.output_tensors.append(output_tensor) + + @property + def text_detector_module(self): + """ + text detect module + """ + if not self._text_detector_module: + self._text_detector_module = hub.Module(name='chinese_text_detection_db_server') + return self._text_detector_module + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def get_rotate_crop_image(self, img, points): + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + img_crop_width = int(np.linalg.norm(points[0] - points[1])) + img_crop_height = int(np.linalg.norm(points[0] - points[3])) + pts_std = np.float32([[0, 0], [img_crop_width, 0],\ + [img_crop_width, img_crop_height], [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective(img_crop, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img + + def resize_norm_img(self, img, max_wh_ratio): + imgC, imgH, imgW = self.rec_image_shape + imgW = int(32 * max_wh_ratio) + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def recognize_text(self, + images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5): + """ + Get the chinese texts in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + use_gpu (bool): Whether to use gpu. + batch_size(int): the program deals once with one + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + box_thresh(float): the threshold of the detected text box's confidence + text_thresh(float): the threshold of the recognize chinese texts' confidence + Returns: + res (list): The result of chinese texts and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + + self.use_gpu = use_gpu + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + detection_results = self.text_detector_module.detect_text( + images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) + boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] + all_results = [] + for index, img_boxes in enumerate(boxes): + original_image = predicted_data[index].copy() + result = {'save_path': ''} + if img_boxes is None: + result['data'] = [] + else: + img_crop_list = [] + boxes = sorted_boxes(img_boxes) + for num_box in range(len(boxes)): + tmp_box = copy.deepcopy(boxes[num_box]) + img_crop = self.get_rotate_crop_image(original_image, tmp_box) + img_crop_list.append(img_crop) + + rec_results = self._recognize_text(img_crop_list) + # if the recognized text confidence score is lower than text_thresh, then drop it + rec_res_final = [] + for index, res in enumerate(rec_results): + text, score = res + if score >= text_thresh: + rec_res_final.append({ + 'text': text, + 'confidence': float(score), + 'text_box_position': boxes[index].astype(np.int).tolist() + }) + result['data'] = rec_res_final + + if visualization and result['data']: + result['save_path'] = self.save_result_image(original_image, boxes, rec_results, output_dir, + text_thresh) + all_results.append(result) + + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + def save_result_image(self, original_image, detection_boxes, rec_results, output_dir='ocr_result', text_thresh=0.5): + image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + txts = [item[0] for item in rec_results] + scores = [item[1] for item in rec_results] + draw_img = draw_ocr( + image, detection_boxes, txts, scores, font_file=self.font_file, draw_txt=True, drop_score=text_thresh) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + save_file_path = os.path.join(output_dir, saved_name) + cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) + return save_file_path + + def _recognize_text(self, image_list): + img_num = len(image_list) + batch_num = 30 + rec_res = [] + predict_time = 0 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = image_list[ino].shape[0:2] + wh_ratio = w / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(image_list[ino], max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.zero_copy_run() + rec_idx_batch = self.output_tensors[0].copy_to_cpu() + rec_idx_lod = self.output_tensors[0].lod()[0] + predict_batch = self.output_tensors[1].copy_to_cpu() + predict_lod = self.output_tensors[1].lod()[0] + + for rno in range(len(rec_idx_lod) - 1): + beg = rec_idx_lod[rno] + end = rec_idx_lod[rno + 1] + rec_idx_tmp = rec_idx_batch[beg:end, 0] + preds_text = self.char_ops.decode(rec_idx_tmp) + beg = predict_lod[rno] + end = predict_lod[rno + 1] + probs = predict_batch[beg:end, :] + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + if len(valid_ind) == 0: + continue + score = np.mean(probs[valid_ind, ind[valid_ind]]) + rec_res.append([preds_text, score]) + + return rec_res + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + detector_dir = os.path.join(dirname, 'text_detector') + recognizer_dir = os.path.join(dirname, 'text_recognizer') + self._save_detector_model(detector_dir, model_filename, params_filename, combined) + self._save_recognizer_model(recognizer_dir, model_filename, params_filename, combined) + logger.info("The inference model has been saved in the path {}".format(os.path.realpath(dirname))) + + def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): + self.text_detector_module.save_inference_model(dirname, model_filename, params_filename, combined) + + def _save_recognizer_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.recognize_text( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='ocr_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") + + +if __name__ == '__main__': + ocr = ChineseOCRDBCRNNServer() + print(ocr.name) + image_path = [ + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' + ] + res = ocr.recognize_text(paths=image_path, visualization=True) + ocr.save_inference_model('save') + print(res) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5574fbabc972dfd17cc6cf04a8145b77c49abab6 --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py @@ -0,0 +1,177 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from PIL import Image, ImageDraw, ImageFont +import base64 +import cv2 +import numpy as np + + +def draw_ocr(image, boxes, txts, scores, font_file, draw_txt=True, drop_score=0.5): + """ + Visualize the results of OCR detection and recognition + args: + image(Image|array): RGB image + boxes(list): boxes with shape(N, 4, 2) + txts(list): the texts + scores(list): txxs corresponding scores + draw_txt(bool): whether draw text or not + drop_score(float): only scores greater than drop_threshold will be visualized + return(array): + the visualized img + """ + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score or math.isnan(score): + continue + box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) + image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) + + if draw_txt: + img = np.array(resize_img(image, input_size=600)) + txt_img = text_visual(txts, scores, font_file, img_h=img.shape[0], img_w=600, threshold=drop_score) + img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) + return img + return image + + +def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): + """ + create new blank img and draw txt on it + args: + texts(list): the text will be draw + scores(list|None): corresponding score of each txt + img_h(int): the height of blank img + img_w(int): the width of blank img + return(array): + """ + if scores is not None: + assert len(texts) == len(scores), "The number of txts and corresponding scores must match" + + def create_blank_img(): + blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 + blank_img[:, img_w - 1:] = 0 + blank_img = Image.fromarray(blank_img).convert("RGB") + draw_txt = ImageDraw.Draw(blank_img) + return blank_img, draw_txt + + blank_img, draw_txt = create_blank_img() + + font_size = 20 + txt_color = (0, 0, 0) + font = ImageFont.truetype(font_file, font_size, encoding="utf-8") + + gap = font_size + 5 + txt_img_list = [] + count, index = 1, 0 + for idx, txt in enumerate(texts): + index += 1 + if scores[idx] < threshold or math.isnan(scores[idx]): + index -= 1 + continue + first_line = True + while str_count(txt) >= img_w // font_size - 4: + tmp = txt + txt = tmp[:img_w // font_size - 4] + if first_line: + new_txt = str(index) + ': ' + txt + first_line = False + else: + new_txt = ' ' + txt + draw_txt.text((0, gap * count), new_txt, txt_color, font=font) + txt = tmp[img_w // font_size - 4:] + if count >= img_h // gap - 1: + txt_img_list.append(np.array(blank_img)) + blank_img, draw_txt = create_blank_img() + count = 0 + count += 1 + if first_line: + new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) + else: + new_txt = " " + txt + " " + '%.3f' % (scores[idx]) + draw_txt.text((0, gap * count), new_txt, txt_color, font=font) + # whether add new blank img or not + if count >= img_h // gap - 1 and idx + 1 < len(texts): + txt_img_list.append(np.array(blank_img)) + blank_img, draw_txt = create_blank_img() + count = 0 + count += 1 + txt_img_list.append(np.array(blank_img)) + if len(txt_img_list) == 1: + blank_img = np.array(txt_img_list[0]) + else: + blank_img = np.concatenate(txt_img_list, axis=1) + return np.array(blank_img) + + +def str_count(s): + """ + Count the number of Chinese characters, + a single English character and a single number + equal to half the length of Chinese characters. + args: + s(string): the input of string + return(int): + the number of Chinese characters + """ + import string + count_zh = count_pu = 0 + s_len = len(s) + en_dg_count = 0 + for c in s: + if c in string.ascii_letters or c.isdigit() or c.isspace(): + en_dg_count += 1 + elif c.isalpha(): + count_zh += 1 + else: + count_pu += 1 + return s_len - math.ceil(en_dg_count / 2) + + +def resize_img(img, input_size=600): + img = np.array(img) + im_shape = img.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + im_scale = float(input_size) / float(im_size_max) + im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) + return im + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def sorted_boxes(dt_boxes): + """ + Sort text boxes in order from top to bottom, left to right + args: + dt_boxes(array):detected text boxes with shape [4, 2] + return: + sorted boxes(array) with shape [4, 2] + """ + num_boxes = dt_boxes.shape[0] + sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) + _boxes = list(sorted_boxes) + + for i in range(num_boxes - 1): + if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \ + (_boxes[i + 1][0][0] < _boxes[i][0][0]): + tmp = _boxes[i] + _boxes[i] = _boxes[i + 1] + _boxes[i + 1] = tmp + return _boxes + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md b/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md rename to modules/image/text_recognition/chinese_text_detection_db_mobile/README.md diff --git a/hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/__init__.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/__init__.py similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_ocr_db_crnn_server/__init__.py rename to modules/image/text_recognition/chinese_text_detection_db_mobile/__init__.py diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3cebc67fc16be87e56fc38857a58b3ddbbb0b635 --- /dev/null +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py @@ -0,0 +1,306 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import math +import os +import time + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, runnable, serving +from PIL import Image +import base64 +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name="chinese_text_detection_db_mobile", + version="1.0.1", + summary= + "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/text_recognition") +class ChineseTextDetectionDB(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, 'inference_model') + self._set_config() + + def check_requirements(self): + try: + import shapely, pyclipper + except: + raise ImportError( + 'This module requires the shapely, pyclipper tools. The running environment does not meet the requirements. Please install the two packages.' + ) + + def _set_config(self): + """ + predictor config setting + """ + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + + config = AnalysisConfig(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + + config.disable_glog_info() + + # use zero copy + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + self.predictor = create_paddle_predictor(config) + input_names = self.predictor.get_input_names() + self.input_tensor = self.predictor.get_input_tensor(input_names[0]) + output_names = self.predictor.get_output_names() + self.output_tensors = [] + for output_name in output_names: + output_tensor = self.predictor.get_output_tensor(output_name) + self.output_tensors.append(output_tensor) + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + left = int(np.min(box[:, 0])) + right = int(np.max(box[:, 0])) + top = int(np.min(box[:, 1])) + bottom = int(np.max(box[:, 1])) + bbox_height = bottom - top + bbox_width = right - left + diffh = math.fabs(box[0, 1] - box[1, 1]) + diffw = math.fabs(box[0, 0] - box[3, 0]) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 10 or rect_height <= 10: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def order_points_clockwise(self, pts): + """ + reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py + # sort the points based on their x-coordinates + """ + xSorted = pts[np.argsort(pts[:, 0]), :] + + # grab the left-most and right-most points from the sorted + # x-roodinate points + leftMost = xSorted[:2, :] + rightMost = xSorted[2:, :] + + # now, sort the left-most coordinates according to their + # y-coordinates so we can grab the top-left and bottom-left + # points, respectively + leftMost = leftMost[np.argsort(leftMost[:, 1]), :] + (tl, bl) = leftMost + + rightMost = rightMost[np.argsort(rightMost[:, 1]), :] + (tr, br) = rightMost + + rect = np.array([tl, tr, br, bl], dtype="float32") + return rect + + def detect_text(self, + images=[], + paths=[], + use_gpu=False, + output_dir='detection_result', + visualization=False, + box_thresh=0.5): + """ + Get the text box in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + use_gpu (bool): Whether to use gpu. Default false. + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + box_thresh(float): the threshold of the detected text box's confidence + Returns: + res (list): The result of text detection box and save path of images. + """ + self.check_requirements() + + from chinese_text_detection_db_mobile.processor import DBPreProcess, DBPostProcess, draw_boxes, get_image_ext + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + preprocessor = DBPreProcess() + postprocessor = DBPostProcess(box_thresh) + + all_imgs = [] + all_ratios = [] + all_results = [] + for original_image in predicted_data: + im, ratio_list = preprocessor(original_image) + res = {'save_path': ''} + if im is None: + res['data'] = [] + + else: + im = im.copy() + starttime = time.time() + self.input_tensor.copy_from_cpu(im) + self.predictor.zero_copy_run() + data_out = self.output_tensors[0].copy_to_cpu() + dt_boxes_list = postprocessor(data_out, [ratio_list]) + boxes = self.filter_tag_det_res(dt_boxes_list[0], original_image.shape) + res['data'] = boxes.astype(np.int).tolist() + + all_imgs.append(im) + all_ratios.append(ratio_list) + if visualization: + img = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + draw_img = draw_boxes(img, boxes) + draw_img = np.array(draw_img) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + cv2.imwrite(os.path.join(output_dir, saved_name), draw_img[:, :, ::-1]) + res['save_path'] = os.path.join(output_dir, saved_name) + + all_results.append(res) + + return all_results + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.detect_text(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.detect_text( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") + + +if __name__ == '__main__': + db = ChineseTextDetectionDB() + image_path = [ + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', + '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' + ] + res = db.detect_text(paths=image_path, visualization=True) + db.save_inference_model('save') + print(res) diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..a213f1efa0607215d2e21ab38622367d1277dcbb --- /dev/null +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py @@ -0,0 +1,223 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from PIL import Image, ImageDraw, ImageFont +from shapely.geometry import Polygon +import cv2 +import numpy as np +import pyclipper + + +class DBPreProcess(object): + def __init__(self, max_side_len=960): + self.max_side_len = max_side_len + + def resize_image_type(self, im): + """ + resize image to a size multiple of 32 which is required by the network + """ + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # limit the max side + if max(resize_h, resize_w) > self.max_side_len: + if resize_h > resize_w: + ratio = float(self.max_side_len) / resize_h + else: + ratio = float(self.max_side_len) / resize_w + else: + ratio = 1. + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + if resize_h % 32 == 0: + resize_h = resize_h + elif resize_h // 32 <= 1: + resize_h = 32 + else: + resize_h = (resize_h // 32 - 1) * 32 + if resize_w % 32 == 0: + resize_w = resize_w + elif resize_w // 32 <= 1: + resize_w = 32 + else: + resize_w = (resize_w // 32 - 1) * 32 + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + except: + print(im.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def normalize(self, im): + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + im = im / 255 + im -= img_mean + im /= img_std + channel_swap = (2, 0, 1) + im = im.transpose(channel_swap) + return im + + def __call__(self, im): + im, (ratio_h, ratio_w) = self.resize_image_type(im) + im = self.normalize(im) + im = im[np.newaxis, :] + return [im, (ratio_h, ratio_w)] + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, thresh=0.3, box_thresh=0.5, max_candidates=1000): + self.thresh = thresh + self.box_thresh = box_thresh + self.max_candidates = max_candidates + self.min_size = 3 + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.max_candidates) + boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) + scores = np.zeros((num_contours, ), dtype=np.float32) + + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + score = self.box_score_fast(pred, points.reshape(-1, 2)) + if self.box_thresh > score: + continue + + box = self.unclip(points).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + if not isinstance(dest_width, int): + dest_width = dest_width.item() + dest_height = dest_height.item() + + box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes[index, :, :] = box.astype(np.int16) + scores[index] = score + return boxes, scores + + def unclip(self, box, unclip_ratio=2.0): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [points[index_1], points[index_2], points[index_3], points[index_4]] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, predictions, ratio_list): + pred = predictions[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + height, width = pred.shape[-2:] + tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height) + + boxes = [] + for k in range(len(tmp_boxes)): + if tmp_scores[k] > self.box_thresh: + boxes.append(tmp_boxes[k]) + if len(boxes) > 0: + boxes = np.array(boxes) + + ratio_h, ratio_w = ratio_list[batch_index] + boxes[:, :, 0] = boxes[:, :, 0] / ratio_w + boxes[:, :, 1] = boxes[:, :, 1] / ratio_h + + boxes_batch.append(boxes) + return boxes_batch + + +def draw_boxes(image, boxes, scores=None, drop_score=0.5): + img = image.copy() + draw = ImageDraw.Draw(img) + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score: + continue + draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red') + draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red') + draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red') + draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red') + draw.line([(box[0][0] - 1, box[0][1] + 1), (box[1][0] - 1, box[1][1] + 1)], fill='red') + draw.line([(box[1][0] - 1, box[1][1] + 1), (box[2][0] - 1, box[2][1] + 1)], fill='red') + draw.line([(box[2][0] - 1, box[2][1] + 1), (box[3][0] - 1, box[3][1] + 1)], fill='red') + draw.line([(box[3][0] - 1, box[3][1] + 1), (box[0][0] - 1, box[0][1] + 1)], fill='red') + return img + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/README.md b/modules/image/text_recognition/chinese_text_detection_db_server/README.md similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_text_detection_db_server/README.md rename to modules/image/text_recognition/chinese_text_detection_db_server/README.md diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/__init__.py b/modules/image/text_recognition/chinese_text_detection_db_server/__init__.py similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_text_detection_db_mobile/__init__.py rename to modules/image/text_recognition/chinese_text_detection_db_server/__init__.py diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/modules/image/text_recognition/chinese_text_detection_db_server/module.py new file mode 100644 index 0000000000000000000000000000000000000000..60c54eb63aece200faa121ce6bd15b4d23f0c9f9 --- /dev/null +++ b/modules/image/text_recognition/chinese_text_detection_db_server/module.py @@ -0,0 +1,303 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import math +import os +import time + +from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, runnable, serving +from PIL import Image +import base64 +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name="chinese_text_detection_db_server", + version="1.0.0", + summary= + "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/text_recognition") +class ChineseTextDetectionDBServer(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, 'ch_det_r50_vd_db') + self._set_config() + + def check_requirements(self): + try: + import shapely, pyclipper + except: + raise ImportError( + 'This module requires the shapely, pyclipper tools. The running environment does not meet the requirements. Please install the two packages.' + ) + + def _set_config(self): + """ + predictor config setting + """ + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + + config = AnalysisConfig(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + + config.disable_glog_info() + + # use zero copy + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + self.predictor = create_paddle_predictor(config) + input_names = self.predictor.get_input_names() + self.input_tensor = self.predictor.get_input_tensor(input_names[0]) + output_names = self.predictor.get_output_names() + self.output_tensors = [] + for output_name in output_names: + output_tensor = self.predictor.get_output_tensor(output_name) + self.output_tensors.append(output_tensor) + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + left = int(np.min(box[:, 0])) + right = int(np.max(box[:, 0])) + top = int(np.min(box[:, 1])) + bottom = int(np.max(box[:, 1])) + bbox_height = bottom - top + bbox_width = right - left + diffh = math.fabs(box[0, 1] - box[1, 1]) + diffw = math.fabs(box[0, 0] - box[3, 0]) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 10 or rect_height <= 10: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def order_points_clockwise(self, pts): + """ + reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py + # sort the points based on their x-coordinates + """ + xSorted = pts[np.argsort(pts[:, 0]), :] + + # grab the left-most and right-most points from the sorted + # x-roodinate points + leftMost = xSorted[:2, :] + rightMost = xSorted[2:, :] + + # now, sort the left-most coordinates according to their + # y-coordinates so we can grab the top-left and bottom-left + # points, respectively + leftMost = leftMost[np.argsort(leftMost[:, 1]), :] + (tl, bl) = leftMost + + rightMost = rightMost[np.argsort(rightMost[:, 1]), :] + (tr, br) = rightMost + + rect = np.array([tl, tr, br, bl], dtype="float32") + return rect + + def detect_text(self, + images=[], + paths=[], + use_gpu=False, + output_dir='detection_result', + visualization=False, + box_thresh=0.5): + """ + Get the text box in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + use_gpu (bool): Whether to use gpu. Default false. + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + box_thresh(float): the threshold of the detected text box's confidence + Returns: + res (list): The result of text detection box and save path of images. + """ + self.check_requirements() + + from chinese_text_detection_db_server.processor import DBPreProcess, DBPostProcess, draw_boxes, get_image_ext + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + preprocessor = DBPreProcess() + postprocessor = DBPostProcess(box_thresh) + + all_imgs = [] + all_ratios = [] + all_results = [] + for original_image in predicted_data: + im, ratio_list = preprocessor(original_image) + res = {'save_path': ''} + if im is None: + res['data'] = [] + + else: + im = im.copy() + starttime = time.time() + self.input_tensor.copy_from_cpu(im) + self.predictor.zero_copy_run() + data_out = self.output_tensors[0].copy_to_cpu() + dt_boxes_list = postprocessor(data_out, [ratio_list]) + boxes = self.filter_tag_det_res(dt_boxes_list[0], original_image.shape) + res['data'] = boxes.astype(np.int).tolist() + + all_imgs.append(im) + all_ratios.append(ratio_list) + if visualization: + img = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + draw_img = draw_boxes(img, boxes) + draw_img = np.array(draw_img) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + cv2.imwrite(os.path.join(output_dir, saved_name), draw_img[:, :, ::-1]) + res['save_path'] = os.path.join(output_dir, saved_name) + + all_results.append(res) + + return all_results + + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + model_file_path = os.path.join(self.pretrained_model_path, 'model') + params_file_path = os.path.join(self.pretrained_model_path, 'params') + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.detect_text(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.detect_text( + paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") + + +if __name__ == '__main__': + db = ChineseTextDetectionDBServer() + image_path = ['/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg'] + res = db.detect_text(paths=image_path, visualization=True) + db.save_inference_model('save') + print(res) diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/processor.py b/modules/image/text_recognition/chinese_text_detection_db_server/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..a213f1efa0607215d2e21ab38622367d1277dcbb --- /dev/null +++ b/modules/image/text_recognition/chinese_text_detection_db_server/processor.py @@ -0,0 +1,223 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from PIL import Image, ImageDraw, ImageFont +from shapely.geometry import Polygon +import cv2 +import numpy as np +import pyclipper + + +class DBPreProcess(object): + def __init__(self, max_side_len=960): + self.max_side_len = max_side_len + + def resize_image_type(self, im): + """ + resize image to a size multiple of 32 which is required by the network + """ + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # limit the max side + if max(resize_h, resize_w) > self.max_side_len: + if resize_h > resize_w: + ratio = float(self.max_side_len) / resize_h + else: + ratio = float(self.max_side_len) / resize_w + else: + ratio = 1. + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + if resize_h % 32 == 0: + resize_h = resize_h + elif resize_h // 32 <= 1: + resize_h = 32 + else: + resize_h = (resize_h // 32 - 1) * 32 + if resize_w % 32 == 0: + resize_w = resize_w + elif resize_w // 32 <= 1: + resize_w = 32 + else: + resize_w = (resize_w // 32 - 1) * 32 + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + except: + print(im.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def normalize(self, im): + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + im = im / 255 + im -= img_mean + im /= img_std + channel_swap = (2, 0, 1) + im = im.transpose(channel_swap) + return im + + def __call__(self, im): + im, (ratio_h, ratio_w) = self.resize_image_type(im) + im = self.normalize(im) + im = im[np.newaxis, :] + return [im, (ratio_h, ratio_w)] + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, thresh=0.3, box_thresh=0.5, max_candidates=1000): + self.thresh = thresh + self.box_thresh = box_thresh + self.max_candidates = max_candidates + self.min_size = 3 + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.max_candidates) + boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) + scores = np.zeros((num_contours, ), dtype=np.float32) + + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + score = self.box_score_fast(pred, points.reshape(-1, 2)) + if self.box_thresh > score: + continue + + box = self.unclip(points).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + if not isinstance(dest_width, int): + dest_width = dest_width.item() + dest_height = dest_height.item() + + box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes[index, :, :] = box.astype(np.int16) + scores[index] = score + return boxes, scores + + def unclip(self, box, unclip_ratio=2.0): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [points[index_1], points[index_2], points[index_3], points[index_4]] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, predictions, ratio_list): + pred = predictions[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + height, width = pred.shape[-2:] + tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height) + + boxes = [] + for k in range(len(tmp_boxes)): + if tmp_scores[k] > self.box_thresh: + boxes.append(tmp_boxes[k]) + if len(boxes) > 0: + boxes = np.array(boxes) + + ratio_h, ratio_w = ratio_list[batch_index] + boxes[:, :, 0] = boxes[:, :, 0] / ratio_w + boxes[:, :, 1] = boxes[:, :, 1] / ratio_h + + boxes_batch.append(boxes) + return boxes_batch + + +def draw_boxes(image, boxes, scores=None, drop_score=0.5): + img = image.copy() + draw = ImageDraw.Draw(img) + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score: + continue + draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red') + draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red') + draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red') + draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red') + draw.line([(box[0][0] - 1, box[0][1] + 1), (box[1][0] - 1, box[1][1] + 1)], fill='red') + draw.line([(box[1][0] - 1, box[1][1] + 1), (box[2][0] - 1, box[2][1] + 1)], fill='red') + draw.line([(box[2][0] - 1, box[2][1] + 1), (box[3][0] - 1, box[3][1] + 1)], fill='red') + draw.line([(box[3][0] - 1, box[3][1] + 1), (box[0][0] - 1, box[0][1] + 1)], fill='red') + return img + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" diff --git a/hub_module/modules/text/README.md b/modules/text/README.md similarity index 100% rename from hub_module/modules/text/README.md rename to modules/text/README.md diff --git a/hub_module/modules/text/embedding/README.md b/modules/text/embedding/README.md similarity index 100% rename from hub_module/modules/text/embedding/README.md rename to modules/text/embedding/README.md diff --git a/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding/README.md b/modules/text/embedding/tencent_ailab_chinese_embedding/README.md similarity index 100% rename from hub_module/modules/text/embedding/tencent_ailab_chinese_embedding/README.md rename to modules/text/embedding/tencent_ailab_chinese_embedding/README.md diff --git a/modules/text/embedding/tencent_ailab_chinese_embedding/module.py b/modules/text/embedding/tencent_ailab_chinese_embedding/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7c2785bcfdda3e7fb01e7a85ac49942b343bd477 --- /dev/null +++ b/modules/text/embedding/tencent_ailab_chinese_embedding/module.py @@ -0,0 +1,149 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import io +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + for line in f: + parts = line.split("\t") + vocab[parts[0]] = int(parts[1]) + + return vocab + + +@moduleinfo( + name="tencent_ailab_chinese_embedding", + version="1.0.0", + summary= + "Tencent AI Lab Embedding Corpus for Chinese Words and Phrases and the vocab size is 8,824,331. For more information, please refer to https://ai.tencent.com/ailab/nlp/zh/embedding.html", + author="", + author_email="", + type="nlp/semantic_model") +class TencentAILabChineseEmbedding(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "model") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab = load_vocab(self.vocab_path) + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained tencent_ailab_chinese_embedding + + Args: + trainable(bool): whether fine-tune the pretrained parameters of simnet_bow or not + num_slots(int): It's number of slots inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of tencent_ailab_chinese_embedding (words) + outputs(dict): the output variables of input words (word embeddings) + main_program(Program): the main_program of tencent_ailab_chinese_embedding with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + with fluid.unique_name.guard(): + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", + initializer=fluid.initializer.TruncatedNormal(scale=0.02), + trainable=trainable) + + text_1 = fluid.data(name='text', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_1 = fluid.embedding( + input=text_1, + size=[len(self.vocab), 200], + is_sparse=True, + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[len(self.vocab), 200], + is_sparse=True, + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[len(self.vocab), 200], + is_sparse=True, + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3'], + list(main_program.global_block().vars.keys())) + + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # load the pretrained model + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {} + outputs = {} + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + + return inputs, outputs, main_program + + def get_vocab_path(self): + return self.vocab_path + + +if __name__ == "__main__": + w2v = TencentAILabChineseEmbedding() + inputs, outputs, program = w2v.context(num_slots=3) + print(inputs) + print(outputs) + print(w2v.get_vocab_path()) diff --git a/hub_module/modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md b/modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md similarity index 100% rename from hub_module/modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md rename to modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md diff --git a/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py b/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b77f6885e2fc0197d70fe1e82203b56203316dfc --- /dev/null +++ b/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py @@ -0,0 +1,149 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import io +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + for line in f: + parts = line.split("\t") + vocab[parts[0]] = int(parts[1]) + + return vocab + + +@moduleinfo( + name="tencent_ailab_chinese_embedding_small", + version="1.0.0", + summary= + "Tencent AI Lab Embedding Corpus for Chinese Words and Phrases and the vocab size is 2,000,002. For more information, please refer to https://ai.tencent.com/ailab/nlp/zh/embedding.html", + author="", + author_email="", + type="nlp/semantic_model") +class TencentAILabChineseEmbeddingSmall(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "model") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab = load_vocab(self.vocab_path) + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained word2vec_skipgram + + Args: + trainable(bool): Whether fine-tune the pretrained parameters of tencent_ailab_chinese_embedding_small or not. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of tencent_ailab_chinese_embedding_small (words) + outputs(dict): the output variables of input words (word embeddings) + main_program(Program): the main_program of tencent_ailab_chinese_embedding_small with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + with fluid.unique_name.guard(): + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", + initializer=fluid.initializer.TruncatedNormal(scale=0.02), + trainable=trainable) + + text_1 = fluid.data(name='text', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_1 = fluid.embedding( + input=text_1, + size=[len(self.vocab), 200], + is_sparse=True, + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[len(self.vocab), 200], + is_sparse=True, + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[len(self.vocab), 200], + is_sparse=True, + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3'], + list(main_program.global_block().vars.keys())) + + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # load the pretrained model + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {} + outputs = {} + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + + return inputs, outputs, main_program + + def get_vocab_path(self): + return self.vocab_path + + +if __name__ == "__main__": + w2v = TencentAILabChineseEmbeddingSmall() + inputs, outputs, program = w2v.context(num_slots=3) + print(inputs) + print(outputs) + print(w2v.get_vocab_path()) diff --git a/hub_module/modules/text/embedding/word2vec_skipgram/README.md b/modules/text/embedding/word2vec_skipgram/README.md similarity index 100% rename from hub_module/modules/text/embedding/word2vec_skipgram/README.md rename to modules/text/embedding/word2vec_skipgram/README.md diff --git a/modules/text/embedding/word2vec_skipgram/module.py b/modules/text/embedding/word2vec_skipgram/module.py new file mode 100644 index 0000000000000000000000000000000000000000..9c76ef92dff4d0113807ac73a2ac662daeedcee6 --- /dev/null +++ b/modules/text/embedding/word2vec_skipgram/module.py @@ -0,0 +1,161 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import io +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + for line in f: + parts = line.strip().split("\t") + vocab[parts[0]] = int(parts[1]) + + return vocab + + +@moduleinfo( + name="word2vec_skipgram", + version="1.1.0", + summary="Chinese word embedding based on the SkipGram.", + author="baidu-nlp", + author_email="", + type="nlp/semantic_model") +class Word2vecSkipGram(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "model") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab = load_vocab(self.vocab_path) + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained word2vec_skipgram + + Args: + trainable(bool): whether fine-tune the pretrained parameters of word2vec_skipgram or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of word2vec_skipgram (words) + outputs(dict): the output variables of input words (word embeddings) + main_program(Program): the main_program of word2vec_skipgram with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + with fluid.unique_name.guard(): + + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", + initializer=fluid.initializer.TruncatedNormal(scale=0.02), + trainable=trainable) + + text_1 = fluid.data(name='text', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_1 = fluid.embedding( + input=text_1, + is_sparse=True, + size=[len(self.vocab), 128], + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + is_sparse=True, + size=[len(self.vocab), 128], + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + is_sparse=True, + size=[len(self.vocab), 128], + padding_idx=len(self.vocab) - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3'], + list(main_program.global_block().vars.keys())) + + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # load the pretrained model + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {} + outputs = {} + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + + return inputs, outputs, main_program + + def get_vocab_path(self): + return self.vocab_path + + +if __name__ == "__main__": + w2v = Word2vecSkipGram() + i, o, p = w2v.context(num_slots=3) + print(w2v.get_vocab_path()) diff --git a/hub_module/modules/text/language_model/README.md b/modules/text/language_model/README.md similarity index 100% rename from hub_module/modules/text/language_model/README.md rename to modules/text/language_model/README.md diff --git a/hub_module/modules/text/lexical_analysis/README.md b/modules/text/lexical_analysis/README.md similarity index 100% rename from hub_module/modules/text/lexical_analysis/README.md rename to modules/text/lexical_analysis/README.md diff --git a/hub_module/modules/text/lexical_analysis/jieba_paddle/README.md b/modules/text/lexical_analysis/jieba_paddle/README.md similarity index 100% rename from hub_module/modules/text/lexical_analysis/jieba_paddle/README.md rename to modules/text/lexical_analysis/jieba_paddle/README.md diff --git a/modules/text/lexical_analysis/jieba_paddle/module.py b/modules/text/lexical_analysis/jieba_paddle/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c4ed5844f0d496debcec9de7a7e65faf5acea335 --- /dev/null +++ b/modules/text/lexical_analysis/jieba_paddle/module.py @@ -0,0 +1,166 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + + +@moduleinfo( + name="jieba_paddle", + version="1.0.0", + summary= + "jieba_paddle is a chineses tokenizer using BiGRU base on the PaddlePaddle deeplearning framework. More information please refer to https://github.com/fxsjy/jieba.", + author="baidu-paddle", + author_email="paddle-dev@gmail.com", + type="nlp/lexical_analysis") +class JiebaPaddle(hub.Module): + def _initialize(self): + pass + + @serving + def cut(self, sentence, use_paddle=True, cut_all=False, HMM=True): + """ + The main function that segments an entire sentence that contains + Chinese characters into separated words. + Args: + sentence(str): The str(unicode) to be segmented. + use_paddle(bool): Whether use jieba paddle model or not. Default as true. + cut_all(bool): Model type. True for full pattern, False for accurate pattern. + HMM(bool): Whether to use the Hidden Markov Model. + + Returns: + results(dict): The word segmentation result of the input sentence, whose key is 'word'. + """ + self.check_dependency() + import jieba + jieba.setLogLevel(logging.ERROR) + jieba._compat.setLogLevel(logging.ERROR) + + if use_paddle: + jieba.enable_paddle() + res = " ".join(jieba.cut(sentence, use_paddle=True)) + seg_list = res.strip(" ").split(" ") + else: + res = " ".join(jieba.cut(sentence, cut_all=cut_all, HMM=HMM)) + seg_list = res.strip(" ").split(" ") + + return seg_list + + def check_dependency(self): + """ + Check jieba tool dependency. + """ + try: + import jieba + except ImportError: + print( + 'This module requires jieba tools. The running enviroment does not meet the requirments. Please install jieba packages.' + ) + exit() + + def cut_for_search(self, sentence, HMM=True): + """ + Finer segmentation for search engines. + Args: + sentence(str): The str(unicode) to be segmented. + HMM(bool): Whether to use the Hidden Markov Model. + + Returns: + results(dict): The word segmentation result of the input sentence, whose key is 'word'. + """ + self.check_dependency() + import jieba + jieba.setLogLevel(logging.ERROR) + res = " ".join(jieba.cut_for_search(sentence, HMM=HMM)) + seg_list = res.strip(" ").split(" ") + return seg_list + + def load_userdict(self, user_dict): + ''' + Load personalized dict to improve detect rate. + Args: + user_dict(str): A plain text file path. It contains words and their ocurrences. Can be a file-like object, or the path of the dictionary file, + whose encoding must be utf-8. + Structure of dict file: + word1 freq1 word_type1 + word2 freq2 word_type2 + ... + + Word type may be ignored + ''' + self.check_dependency() + import jieba + jieba.setLogLevel(logging.ERROR) + jieba.load_userdict("userdict.txt") + + def extract_tags(self, sentence, topK=20, withWeight=False, allowPOS=(), withFlag=False): + """ + Extract keywords from sentence using TF-IDF algorithm. + Args: + topK(int): return how many top keywords. `None` for all possible words. + withWeight(bool): if True, return a list of (word, weight); + if False, return a list of words. + allowPOS(tuple): the allowed POS list eg. ['ns', 'n', 'vn', 'v','nr']. + if the POS of w is not in this list,it will be filtered. + withFlag(bool): only work with allowPOS is not empty. + if True, return a list of pair(word, weight) like posseg.cut + if False, return a list of words + Returns: + result(list): The key words. + """ + self.check_dependency() + import jieba + import jieba.analyse + jieba.setLogLevel(logging.ERROR) + res = jieba.analyse.extract_tags( + sentence, topK=topK, withWeight=withWeight, allowPOS=allowPOS, withFlag=withFlag) + return res + + def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'), withFlag=False): + """ + Extract keywords from sentence using TextRank algorithm. + Args: + topK(int): return how many top keywords. `None` for all possible words. + withWeight(bool): if True, return a list of (word, weight); + if False, return a list of words. + allowPOS(tuple): the allowed POS list eg. ['ns', 'n', 'vn', 'v','nr']. + if the POS of w is not in this list,it will be filtered. + withFlag(bool): only work with allowPOS is not empty. + if True, return a list of pair(word, weight) like posseg.cut + if False, return a list of words + Returns: + result(list): The key words. + """ + self.check_dependency() + import jieba + jieba.setLogLevel(logging.ERROR) + res = jieba.analyse.textrank(sentence, topK=topK, withWeight=withWeight, allowPOS=allowPOS, withFlag=withFlag) + return res + + +if __name__ == "__main__": + jb_pd = JiebaPaddle() + res = jb_pd.cut( + sentence="我来到北京清华大学", + use_paddle=True, + ) + print(res) + res = jb_pd.cut(sentence="我来到北京清华大学", use_paddle=False, cut_all=True) + print(res) + res = jb_pd.cut(sentence="我来到北京清华大学", use_paddle=False, cut_all=False) + print(res) + res = jb_pd.cut_for_search(sentence="我来到北京清华大学") + print(res) + res = jb_pd.extract_tags(sentence="我来到北京清华大学") + print(res) + res = jb_pd.extract_tags(sentence="我来到北京清华大学", withWeight=True) + print(res) + res = jb_pd.textrank(sentence="我来到北京清华大学", withWeight=True) + print(res) diff --git a/hub_module/modules/text/lexical_analysis/lac/README.md b/modules/text/lexical_analysis/lac/README.md similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/README.md rename to modules/text/lexical_analysis/lac/README.md diff --git a/hub_module/modules/image/text_recognition/chinese_text_detection_db_server/__init__.py b/modules/text/lexical_analysis/lac/__init__.py similarity index 100% rename from hub_module/modules/image/text_recognition/chinese_text_detection_db_server/__init__.py rename to modules/text/lexical_analysis/lac/__init__.py diff --git a/modules/text/lexical_analysis/lac/ahocorasick.py b/modules/text/lexical_analysis/lac/ahocorasick.py new file mode 100644 index 0000000000000000000000000000000000000000..efa35c8c70658325a6d554297036b58bce402990 --- /dev/null +++ b/modules/text/lexical_analysis/lac/ahocorasick.py @@ -0,0 +1,148 @@ +# -*- coding: UTF-8 -*- +""" +本模块实现AC自动机封装为Ahocorasick类,用于进行词典的多模匹配。 +""" + + +class Node(object): + """AC自动机的树结点. + + Attributes: + next: dict类型,指向子结点 + fail: Node类型,AC自动机的fail指针 + length: int类型,判断节点是否为单词 + """ + __slots__ = ['next', 'fail', 'length'] + + def __init__(self): + """初始化空节点.""" + self.next = {} + self.fail = None # fail指针默认为None + self.length = -1 + + +class Ahocorasick(object): + """实现AC自动机的类 + + Attributes: + __root: Node类型,AC自动机根节点 + """ + + def __init__(self): + """初始化Ahocorasick的根节点__root""" + self.__root = Node() + + def add_word(self, word): + """添加单词word到Trie树中""" + current = self.__root + for char in word: + current = current.next.setdefault(char, Node()) + current.length = len(word) + + def make(self): + """构建fail指针路径""" + + queue = list() + for key in self.__root.next: + self.__root.next[key].fail = self.__root + queue.append(self.__root.next[key]) + + # 广度优先算法遍历设置fail指针 + while len(queue) > 0: + # 基于当前节点的fail指针设置其子结点的fail指针 + current = queue.pop(0) + + for k in current.next: + current_fail = current.fail + + # 若当前节点有fail指针,尝试设置其子结点的fail指针 + while current_fail is not None: + if k in current_fail.next: + current.next[k].fail = current_fail.next[k] + break + current_fail = current_fail.fail + + # 若当前节点的fail指针不存在该子结点,令子结点fail指向根节点 + if current_fail is None: + current.next[k].fail = self.__root + + queue.append(current.next[k]) + + def search(self, content): + """后向最大匹配. + + 对content的文本进行多模匹配,返回后向最大匹配的结果. + + Args: + content: string类型, 用于多模匹配的字符串 + + Returns: + list类型, 最大匹配单词列表,每个元素为匹配的模式串在句中的起止位置,比如: + [(0, 2), [4, 7]] + + """ + result = [] + p = self.__root + for current_position in range(len(content)): + word = content[current_position] + + # + while word not in p.next: + if p == self.__root: + break + p = p.fail + else: + p = p.next[word] + if p.length > 0: + result.append((current_position - p.length + 1, current_position)) + + return result + + def search_all(self, content): + """多模匹配的完全匹配. + + 对content的文本进行多模匹配,返回所有匹配结果 + + Args: + content: string类型, 用于多模匹配的字符串 + + Returns: + list类型, 所有匹配单词列表,每个元素为匹配的模式串在句中的起止位置,比如: + [(0, 2), [4, 7]] + + """ + result = [] + p = self.__root + for current_position in range(len(content)): + word = content[current_position] + + while word not in p.next: + if p == self.__root: + break + p = p.fail + else: + p = p.next[word] + + # 回溯查看是否存在以当前字符结尾的单词 + tmp = p + while tmp != self.__root: + if tmp.length > 0: + result.append((current_position - tmp.length + 1, current_position)) + tmp = tmp.fail + + return result + + +if __name__ == '__main__': + + ah = Ahocorasick() + x = ["百度", "家", "高科技", "科技", "科技公司"] + for i in x: + ah.add_word(i) + ah.make() + string = '百度是家高科技公司' + for begin, end in ah.search_all(string): + print('all:', string[begin:end + 1]) + + for begin, end in ah.search(string): + print('search:', string[begin:end + 1]) diff --git a/hub_module/modules/text/lexical_analysis/lac/assets/q2b.dic b/modules/text/lexical_analysis/lac/assets/q2b.dic similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/assets/q2b.dic rename to modules/text/lexical_analysis/lac/assets/q2b.dic diff --git a/hub_module/modules/text/lexical_analysis/lac/assets/tag.dic b/modules/text/lexical_analysis/lac/assets/tag.dic similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/assets/tag.dic rename to modules/text/lexical_analysis/lac/assets/tag.dic diff --git a/hub_module/modules/text/lexical_analysis/lac/assets/tag_file.txt b/modules/text/lexical_analysis/lac/assets/tag_file.txt similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/assets/tag_file.txt rename to modules/text/lexical_analysis/lac/assets/tag_file.txt diff --git a/hub_module/modules/text/lexical_analysis/lac/assets/unigram.dict b/modules/text/lexical_analysis/lac/assets/unigram.dict similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/assets/unigram.dict rename to modules/text/lexical_analysis/lac/assets/unigram.dict diff --git a/hub_module/modules/text/lexical_analysis/lac/assets/word.dic b/modules/text/lexical_analysis/lac/assets/word.dic similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/assets/word.dic rename to modules/text/lexical_analysis/lac/assets/word.dic diff --git a/modules/text/lexical_analysis/lac/custom.py b/modules/text/lexical_analysis/lac/custom.py new file mode 100644 index 0000000000000000000000000000000000000000..88a62b4b8ee67c411965dc898aa4ccd896b63a1d --- /dev/null +++ b/modules/text/lexical_analysis/lac/custom.py @@ -0,0 +1,91 @@ +# -*- coding: UTF-8 -*- +""" +该模块实现用户自定义词典的功能 +""" + +from io import open + +from .ahocorasick import Ahocorasick + + +class Customization(object): + """ + 基于AC自动机实现用户干预的功能 + """ + + def __init__(self): + self.dictitem = {} + self.ac = None + pass + + def load_customization(self, filename, sep=None): + """装载人工干预词典""" + self.ac = Ahocorasick() + with open(filename, 'r', encoding='utf8') as f: + for line in f: + if sep == None: + words = line.strip().split() + else: + words = line.strip().split(sep) + + if len(words) == 0: + continue + + phrase = "" + tags = [] + offset = [] + for word in words: + if word.rfind('/') < 1: + phrase += word + tags.append('') + else: + phrase += word[:word.rfind('/')] + tags.append(word[word.rfind('/') + 1:]) + offset.append(len(phrase)) + + if len(phrase) < 2 and tags[0] == '': + continue + + self.dictitem[phrase] = (tags, offset) + self.ac.add_word(phrase) + self.ac.make() + + def parse_customization(self, query, lac_tags): + """使用人工干预词典修正lac模型的输出""" + + def ac_postpress(ac_res): + ac_res.sort() + i = 1 + while i < len(ac_res): + if ac_res[i - 1][0] < ac_res[i][0] and ac_res[i][0] <= ac_res[i - 1][1]: + ac_res.pop(i) + continue + i += 1 + return ac_res + + if not self.ac: + print("Customized dict is not loaded.") + return + + ac_res = self.ac.search(query) + + ac_res = ac_postpress(ac_res) + + for begin, end in ac_res: + phrase = query[begin:end + 1] + index = begin + + tags, offsets = self.dictitem[phrase] + for tag, offset in zip(tags, offsets): + while index < begin + offset: + if len(tag) == 0: + lac_tags[index] = lac_tags[index][:-1] + 'I' + else: + lac_tags[index] = tag + "-I" + index += 1 + + lac_tags[begin] = lac_tags[begin][:-1] + 'B' + for offset in offsets: + index = begin + offset + if index < len(lac_tags): + lac_tags[index] = lac_tags[index][:-1] + 'B' diff --git a/modules/text/lexical_analysis/lac/module.py b/modules/text/lexical_analysis/lac/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fb460ba58c2621274b591ea8cb84a33dfe3565d4 --- /dev/null +++ b/modules/text/lexical_analysis/lac/module.py @@ -0,0 +1,456 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import io +import json +import math +import os +import six + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.common.utils import sys_stdin_encoding +from paddlehub.io.parser import txt_parser +from paddlehub.module.module import moduleinfo, runnable, serving + +from lac.network import lex_net +from lac.processor import load_kv_dict, word_to_ids, parse_result +from lac.custom import Customization + + +class DataFormatError(Exception): + def __init__(self, *args): + self.args = args + + +@moduleinfo( + name="lac", + version="2.2.0", + summary= + "Baidu's open-source lexical analysis tool for Chinese, including word segmentation, part-of-speech tagging & named entity recognition", + author="baidu-nlp", + author_email="paddle-dev@baidu.com", + type="nlp/lexical_analysis") +class LAC(hub.Module): + def _initialize(self, user_dict=None): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "infer_model") + self.word2id_dict = load_kv_dict(os.path.join(self.directory, "assets/word.dic"), reverse=True, value_func=int) + self.id2word_dict = load_kv_dict(os.path.join(self.directory, "assets/word.dic")) + self.label2id_dict = load_kv_dict(os.path.join(self.directory, "assets/tag.dic"), reverse=True, value_func=int) + self.id2label_dict = load_kv_dict(os.path.join(self.directory, "assets/tag.dic")) + self.word_replace_dict = load_kv_dict(os.path.join(self.directory, "assets/q2b.dic")) + self.oov_id = self.word2id_dict['OOV'] + self.word_dict_len = max(map(int, self.word2id_dict.values())) + 1 + self.label_dict_len = max(map(int, self.label2id_dict.values())) + 1 + self.tag_file = os.path.join(self.directory, "assets/tag_file.txt") + + if user_dict: + self.set_user_dict(dict_path=user_dict) + else: + self.custom = None + + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=False): + """ + Get the input ,output and program of the pretrained lac + + Args: + trainable(bool): whether fine-tune the pretrained parameters of lac or not + + Returns: + inputs(dict): the input variables of lac (words) + outputs(dict): the output variables of lac (the word segmentation results) + main_program(Program): the main_program of lac with pretrained prameters + """ + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + with fluid.unique_name.guard(): + crf_decode, word, fc = lex_net(self.word_dict_len, self.label_dict_len) + word_name = word.name + pred_name = crf_decode.name + fc_name = fc.name + + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name) + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # load the lac pretrained model + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {"words": main_program.global_block().vars[prefix_name + word_name]} + outputs = { + "predicted": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + return inputs, outputs, main_program + + def set_user_dict(self, dict_path, sep=None): + """ + Set the costomized dictionary if you wanna exploit the self-defined dictionary + + Args: + dict_path(str): The directory to the costomized dictionary. + sep: The seperation token in phases. Default as ' ' or '\t'. + """ + if not os.path.exists(dict_path): + raise RuntimeError("File %s is not exist." % dict_path) + self.custom = Customization() + self.custom.load_customization(dict_path, sep) + + def del_user_dict(self): + """ + Delete the costomized dictionary if you don't wanna exploit the self-defined dictionary any longer + """ + + if self.custom: + self.custom = None + print("Successfully delete the customized dictionary!") + + def to_unicode(self, texts): + """ + Convert each element's type(str) of texts(list) to unicode in python2.7 + + Args: + texts(list): each element's type is str in python2.7 + + Returns: + texts(list): each element's type is unicode in python2.7 + """ + if six.PY2: + unicode_texts = [] + for text in texts: + if isinstance(text, six.string_types): + unicode_texts.append(text.decode(sys_stdin_encoding()).decode("utf8")) + else: + unicode_texts.append(text) + texts = unicode_texts + return texts + + def texts2tensor(self, texts): + """ + Tranform the texts(list) to PaddleTensor + Args: + texts(list): texts + Returns: + tensor(PaddleTensor): tensor with texts data + """ + lod = [0] + data = [] + for i, text in enumerate(texts): + text_inds = word_to_ids(text, self.word2id_dict, self.word_replace_dict, oov_id=self.oov_id) + data += text_inds + lod.append(len(text_inds) + lod[i]) + tensor = PaddleTensor(np.array(data).astype('int64')) + tensor.name = "words" + tensor.lod = [lod] + tensor.shape = [lod[-1], 1] + return tensor + + def _get_index(self, data_list, item=""): + """ + find all indexes of item in data_list + """ + res = [] + for index, data in enumerate(data_list): + if data == item: + res.append(index) + return res + + @serving + def cut(self, text, use_gpu=False, batch_size=1, return_tag=True): + """ + The main function that segments an entire text that contains + Chinese characters into separated words. + Args: + text(:obj:`str` or :obj:`List[str]`): The chinese texts to be segmented. This can be a string, a list of strings. + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + return_tag: Whether to get tag or not. + + Returns: + results(dict or list): The word segmentation result of the input text, whose key is 'word', if text is a list. + If text is a str, the word segmentation result (list) is obtained. + + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if isinstance(text, list) and len(text) != 0: + + predicted_data = self.to_unicode(text) + + # drop the empty string like "" in predicted_data + empty_str_indexes = self._get_index(predicted_data) + predicted_data = [data for data in predicted_data if data != ""] + + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + tensor_words = self.texts2tensor(batch_data) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = parse_result(batch_data, batch_out[0], self.id2label_dict, interventer=self.custom) + results += batch_result + + for index in empty_str_indexes: + results.insert(index, {"word": [""], "tag": [""]}) + + if not return_tag: + for result in results: + result = result.pop("tag") + return results + + return results + elif isinstance(text, str) and text != "": + tensor_words = self.texts2tensor([text]) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = parse_result([text], batch_out[0], self.id2label_dict, interventer=self.custom) + + return batch_result[0]['word'] + elif text == "": + return text + else: + raise TypeError("The input data is inconsistent with expectations.") + + def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, return_tag=True): + """ + Get the word segmentation results with the texts as input + + Args: + texts(list): the input texts to be segmented, if texts not data + data(dict): key must be 'text', value is the texts to be segmented, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + return_tag: Whether to get tag or not. + + Returns: + results(list): the word segmentation results + """ + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise TypeError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + + # drop the empty string like "" in predicted_data + empty_str_indexes = self._get_index(predicted_data) + predicted_data = [data for data in predicted_data if data != ""] + + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + tensor_words = self.texts2tensor(batch_data) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = parse_result(batch_data, batch_out[0], self.id2label_dict, interventer=self.custom) + results += batch_result + + for index in empty_str_indexes: + results.insert(index, {"word": [""], "tag": [""]}) + + if not return_tag: + for result in results: + result = result.pop("tag") + return results + + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the lac module.", prog='hub run lac', usage='%(prog)s', add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + if args.user_dict: + self.set_user_dict(args.user_dict) + + results = self.lexical_analysis( + texts=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size, return_tag=args.return_tag) + + return results + + def get_tags(self): + """ + Get the tags which was used when pretraining lac + + Returns: + self.tag_name_dict(dict):lac tags + """ + self.tag_name_dict = {} + with io.open(self.tag_file, encoding="utf8") as f: + for line in f: + tag, tag_name = line.strip().split(" ") + self.tag_name_dict[tag] = tag_name + return self.tag_name_dict + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + self.arg_config_group.add_argument( + '--user_dict', + type=str, + default=None, + help="customized dictionary for intervening the word segmentation result") + self.arg_config_group.add_argument( + '--return_tag', type=ast.literal_eval, default=True, help="whether return tags of results or not") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + self.arg_input_group.add_argument('--input_text', type=str, default=None, help="text to predict") + + def check_input_data(self, args): + input_data = [] + if args.input_file: + if not os.path.exists(args.input_file): + print("File %s is not exist." % args.input_file) + raise RuntimeError + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + elif args.input_text: + if args.input_text.strip() != '': + if six.PY2: + input_data = [args.input_text.decode(sys_stdin_encoding()).decode("utf8")] + else: + input_data = [args.input_text] + + if input_data == []: + print("ERROR: The input data is inconsistent with expectations.") + raise DataFormatError + + return input_data + + +if __name__ == '__main__': + lac = LAC(user_dict="user.dict") + # or use the fuction user_dict to set + # lac.set_user_dict("user.dict") + + test_text = ["今天是个好日子", "天气预报说今天要下雨", "", "下一班地铁马上就要到了", "", "调料份量不能多,也不能少,味道才能正好", "", "", "春天的花开秋天的风以及冬天的落阳"] + + # execute predict and print the result + results = lac.cut(text=test_text, use_gpu=True, batch_size=7, return_tag=True) + for result in results: + if six.PY2: + print(json.dumps(result['word'], encoding="utf8", ensure_ascii=False)) + print(json.dumps(result['tag'], encoding="utf8", ensure_ascii=False)) + else: + print(result['word']) + print(result['tag']) + + # delete the costomized dictionary + lac.del_user_dict() + + results = lac.cut(text="春天的花开秋天的风以及冬天的落阳", use_gpu=False, batch_size=1, return_tag=False) + print(results) + + # get the tags that was exploited as pretraining lac + print(lac.get_tags()) diff --git a/modules/text/lexical_analysis/lac/network.py b/modules/text/lexical_analysis/lac/network.py new file mode 100755 index 0000000000000000000000000000000000000000..47a7de4ad5c80b80b5dd51add67a308de8321344 --- /dev/null +++ b/modules/text/lexical_analysis/lac/network.py @@ -0,0 +1,87 @@ +# -*- coding:utf-8 -*- +import paddle.fluid as fluid + + +def lex_net(word_dict_len, label_dict_len): + """ + define the lexical analysis network structure + """ + word_emb_dim = 128 + grnn_hidden_dim = 128 + emb_lr = 2 + crf_lr = 0.2 + bigru_num = 2 + init_bound = 0.1 + IS_SPARSE = True + + def _bigru_layer(input_feature): + """ + define the bidirectional gru layer + """ + pre_gru = fluid.layers.fc( + input=input_feature, + size=grnn_hidden_dim * 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) + gru = fluid.layers.dynamic_gru( + input=pre_gru, + size=grnn_hidden_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) + + pre_gru_r = fluid.layers.fc( + input=input_feature, + size=grnn_hidden_dim * 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) + gru_r = fluid.layers.dynamic_gru( + input=pre_gru_r, + size=grnn_hidden_dim, + is_reverse=True, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) + + bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1) + return bi_merge + + def _net_conf(word): + """ + Configure the network + """ + word_embedding = fluid.layers.embedding( + input=word, + size=[word_dict_len, word_emb_dim], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr=fluid.ParamAttr( + learning_rate=emb_lr, + name="word_emb", + initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound))) + + input_feature = word_embedding + for i in range(bigru_num): + bigru_output = _bigru_layer(input_feature) + input_feature = bigru_output + + emission = fluid.layers.fc( + size=label_dict_len, + input=bigru_output, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), + regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) + + size = emission.shape[1] + fluid.layers.create_parameter(shape=[size + 2, size], dtype=emission.dtype, name='crfw') + crf_decode = fluid.layers.crf_decoding(input=emission, param_attr=fluid.ParamAttr(name='crfw')) + + return crf_decode, emission + + word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) + + crf_decode, emission = _net_conf(word) + + return crf_decode, word, emission diff --git a/modules/text/lexical_analysis/lac/processor.py b/modules/text/lexical_analysis/lac/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad9d6616b173541b8b74e7bf963788e81caf3c2 --- /dev/null +++ b/modules/text/lexical_analysis/lac/processor.py @@ -0,0 +1,305 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np +import os +import six + + +class Query(object): + def __init__(self, lac_query): + self.set_query(lac_query) + + def set_query(self, lac_query): + """ + self.lac_query_list = ["我/r", "和/c", "妈妈/n", "经常/d", "过去/v", "那儿/r", "散步/v"] + self.seg_query_list = ["我", "和", "妈妈", "经常", "过去", "那儿", "散步"] + self.seg_query_str = "我 和 妈妈 经常 过去 那儿 散步" + self.ori_query_str = "我和妈妈经常过去那儿散步" + """ + length = len(lac_query['word']) + if six.PY2: + self.lac_query_list = [ + lac_query["word"][index].encode("utf8") + "/" + lac_query["tag"][index].encode("utf8") + for index in range(length) + ] + else: + self.lac_query_list = [lac_query["word"][index] + "/" + lac_query["tag"][index] for index in range(length)] + + self.seg_query_list = [] + for phrase in self.lac_query_list: + index = phrase.rfind("/") + word = phrase[0:index] + self.seg_query_list.append(word) + self.seg_query_str = " ".join(self.seg_query_list) + self.ori_query_str = "".join(self.seg_query_list) + + +class Bound(object): + def __init__(self, start_index=0, end_index=0, left_bound=0, right_bound=0, left_char_bound=0, right_char_bound=0): + self.start_index = start_index # 命中的词的起始位置,char级别 + self.end_index = end_index # 命中的词的结束位置,char级别 + self.left_bound = left_bound # 原分词级别的起始位置 + self.right_bound = right_bound # 原分词级别的结束位置 + self.left_char_bound = left_char_bound # 原 char 级别的起始位置 + self.right_char_bound = right_char_bound # 原 char 级别的结束位置 + + +class Interventer(object): + def __init__(self, ngram_dict_path, user_dict_path): + self.ngram_dict_path = ngram_dict_path + self.user_dict_path = user_dict_path + self.init_pos_types() + self.load_dict() + + def init_pos_types(self): + all_pos_types = "n f s t nr ns nt nw nz v vd vn" \ + + " a ad an d m q r p c u xc w PER LOC ORG TIME" + self.all_pos_types = set([pos_type.lower() for pos_type in all_pos_types.split(" ")]) + + def load_dict(self): + """load unigram dict and user dict""" + import ahocorasick + self.total_count = 0.0 + self.ngram_dict = {} + print("Loading dict...") + for line in io.open(self.ngram_dict_path, mode="r", encoding="utf-8"): + if six.PY2: + word, pos, wordfreq = line.encode("utf-8").strip('\n').split('\t') + else: + word, pos, wordfreq = line.strip('\n').split('\t') + wordfreq = int(wordfreq) + if pos.lower() not in self.all_pos_types: + continue + assert wordfreq > 0, "Word frequency must be postive integer!" + self.total_count += wordfreq + self.ngram_dict[word + "/" + pos] = wordfreq + for key in self.ngram_dict: + wordfreq = self.ngram_dict[key] + self.ngram_dict[key] = np.log(wordfreq / self.total_count) + self.oov_score = np.log(1 / self.total_count) + + self.user_dict = ahocorasick.Automaton() + for line in io.open(self.user_dict_path, mode="r", encoding="utf-8"): + if six.PY2: + word, pos, wordfreq = line.encode("utf-8").strip('\n').split('\t') + else: + word, pos, wordfreq = line.strip('\n').split('\t') + wordfreq = int(wordfreq) + assert pos in self.all_pos_types, "Invalid POS type" + assert wordfreq > 0, "Word frequency must be postive integer!" + self.ngram_dict[word + "/" + pos] = np.log(wordfreq / self.total_count) + self.user_dict.add_word(word, (word, pos, wordfreq)) + self.user_dict.make_automaton() + + def find_min_bound(self, match_info, query): + """ + find minimum Bound for match_word + """ + end_index, (match_word, pos, wordfreq) = match_info + start_index = end_index - len(match_word) + 1 + + bound = Bound(start_index=start_index, end_index=end_index) + + # find left bound + query_len = 0 + for word_index, word in enumerate(query.seg_query_list): + query_len += len(word) + if query_len > start_index: + bound.left_bound = word_index + bound.left_char_bound = query_len - len(word) + break + # find right bound + query_len = 0 + for word_index, word in enumerate(query.seg_query_list): + query_len += len(word) + if query_len > end_index: + bound.right_bound = word_index + bound.right_char_bound = query_len - 1 + break + return bound + + def calc_lm_score(self, phrase_list): + """calculate the language model score""" + lm_score = 0.0 + if len(phrase_list) == 0: + return 0.0 + for phrase in phrase_list: + lm_score += self.ngram_dict.get(phrase, self.oov_score) + return lm_score / len(phrase_list) + + def get_new_phrase_list(self, match_info, bound, query): + """ + 比较用户词典给出的词和原分词结果,根据打分决定是否替换 + """ + new_phrase_list = [] + phrase_left = query.ori_query_str[bound.left_char_bound:bound.start_index] + phrase_right = query.ori_query_str[bound.end_index + 1:bound.right_char_bound + 1] + if phrase_left != "": + phrase_left += "/" + query.lac_query_list[bound.left_bound].split('/')[1] + new_phrase_list.append(phrase_left) + new_phrase_list.append(match_info[1][0] + "/" + match_info[1][1]) + if phrase_right != "": + phrase_right += "/" + query.lac_query_list[bound.right_bound].split('/')[1] + new_phrase_list.append(phrase_right) + + new_query_list = query.lac_query_list[0: bound.left_bound] + new_phrase_list + \ + query.lac_query_list[bound.right_bound + 1: ] + new_lm_score = self.calc_lm_score(new_query_list) + return new_lm_score, new_phrase_list + + def run(self, query): + """ + step 1, 用AC自动机检测出匹配到的用户词 + step 2, 每个用户词查找最小分词边界,计算每种分词结果的打分,PK + step 3, 怎么处理冲突? + 3.a. 假设 AC自动机检测到的关键词都是顺序的,那么只需要考虑前后两个的替换词即可 + 3.b. 假如前后两个替换词没有位置冲突,那么直接把前一个加到替换列表里 + 3.c. 假如前后两个替换词有冲突,比较分数,舍弃一个,更新上一个替换的位置 + step 4, 最终依次执行替换 + """ + last_bound = None + last_phrase_list = None + last_lm_score = None + all_result = [] + old_lm_score = self.calc_lm_score(query.lac_query_list) + + for match_info in self.user_dict.iter(query.ori_query_str): + #print "matched: \"%s\" in query: \"%s\"" % (match_info[1][0], query.seg_query_str) + bound = self.find_min_bound(match_info, query) + new_lm_score, new_phrase_list = self.get_new_phrase_list(match_info, bound, query) + + # 如果打分比原 LAC 结果低,抛弃用户词典里的结果 + if new_lm_score <= old_lm_score: + #print >> sys.stderr, "skipped %s, old_lm_score: %.5f, " \ + # "new_lm_score: %.5f" % (" ".join(new_phrase_list), old_lm_score, new_lm_score) + continue + # 遇到的第一个匹配到的结果 + if last_bound is None: + last_bound = bound + last_phrase_list = new_phrase_list + last_lm_score = new_lm_score + continue + if bound.left_bound > last_bound.right_bound: + # 位置上没有冲突,则把上次的结果加到最终结果中去 + all_result.append((last_bound, last_phrase_list)) + last_bound = bound + last_phrase_list = new_phrase_list + last_lm_score = new_lm_score + else: + # 位置上有冲突 + if new_lm_score > last_lm_score: + # 若分数高于上次结果,则覆盖;否则丢弃 + last_bound = bound + last_phrase_list = new_phrase_list + last_lm_score = new_lm_score + + if last_bound is not None: + all_result.append((last_bound, last_phrase_list)) + + # 合并所有替换的结果 + final_phrase_list = [] + last_index = -1 + for bound, phrase_list in all_result: + final_phrase_list += query.lac_query_list[last_index + 1:bound.left_bound] + phrase_list + last_index = bound.right_bound + final_phrase_list += query.lac_query_list[last_index + 1:] + + final_result = {'word': [], 'tag': []} + for phrase in final_phrase_list: + index = phrase.rfind("/") + word = phrase[0:index] + tag = phrase[index + 1:] + final_result['word'].append(word) + final_result['tag'].append(tag) + + return final_result + + +def load_kv_dict(dict_path, reverse=False, delimiter="\t", key_func=None, value_func=None): + """ + Load key-value dict from file + """ + result_dict = {} + for line in io.open(dict_path, "r", encoding='utf8'): + terms = line.strip("\n").split(delimiter) + if len(terms) != 2: + continue + if reverse: + value, key = terms + else: + key, value = terms + if key in result_dict: + raise KeyError("key duplicated with [%s]" % (key)) + if key_func: + key = key_func(key) + if value_func: + value = value_func(value) + result_dict[key] = value + return result_dict + + +def word_to_ids(words, word2id_dict, word_replace_dict, oov_id=None): + """convert word to word index""" + word_ids = [] + for word in words: + word = word_replace_dict.get(word, word) + word_id = word2id_dict.get(word, oov_id) + word_ids.append(word_id) + + return word_ids + + +def parse_result(lines, crf_decode, id2label_dict, interventer=None): + """Convert model's output tensor into string and tags """ + offset_list = crf_decode.lod[0] + crf_decode = crf_decode.as_ndarray() + batch_size = len(offset_list) - 1 + batch_out = [] + for sent_index in range(batch_size): + begin, end = offset_list[sent_index], offset_list[sent_index + 1] + sent = lines[sent_index] + tags = [id2label_dict[str(tag_id[0])] for tag_id in crf_decode[begin:end]] + + if interventer: + interventer.parse_customization(sent, tags) + + sent_out = [] + tags_out = [] + for ind, tag in enumerate(tags): + # for the first char + if len(sent_out) == 0 or tag.endswith("B") or tag.endswith("S"): + sent_out.append(sent[ind]) + tags_out.append(tag[:-2]) + continue + sent_out[-1] += sent[ind] + tags_out[-1] = tag[:-2] + + seg_result = {"word": sent_out, "tag": tags_out} + batch_out.append(seg_result) + + return batch_out + + +# sent_out = [] +# tags_out = [] +# parital_word = "" +# for ind, tag in enumerate(tags): +# # for the first word +# if parital_word == "": +# parital_word = sent[ind] +# tags_out.append(tag.split('-')[0]) +# continue +# # for the beginning of word +# if tag.endswith("-B") or (tag == "O" and tags[ind - 1] != "O"): +# sent_out.append(parital_word) +# tags_out.append(tag.split('-')[0]) +# parital_word = sent[ind] +# continue +# parital_word += sent[ind] +# # append the last word, except for len(tags)=0 +# if len(sent_out) < len(tags_out): +# sent_out.append(parital_word) +# seg_result = {"word": sent_out, "tag": tags_out} + +# batch_out.append(seg_result) +# return batch_out diff --git a/hub_module/modules/text/lexical_analysis/lac/user.dict b/modules/text/lexical_analysis/lac/user.dict similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/user.dict rename to modules/text/lexical_analysis/lac/user.dict diff --git a/hub_module/modules/text/semantic_model/README.md b/modules/text/semantic_model/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/README.md rename to modules/text/semantic_model/README.md diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/README.md b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/README.md rename to modules/text/semantic_model/bert_cased_L_12_H_768_A_12/README.md diff --git a/hub_module/modules/text/lexical_analysis/lac/__init__.py b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/__init__.py similarity index 100% rename from hub_module/modules/text/lexical_analysis/lac/__init__.py rename to modules/text/semantic_model/bert_cased_L_12_H_768_A_12/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/__init__.py b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/__init__.py rename to modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/__init__.py diff --git a/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/bert.py b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..c42acad03751e2c1adc71faede6db393a2daca8a --- /dev/null +++ b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_cased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/transformer_encoder.py b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/module.py b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d5ec9e197980778d0165904f98a7b139d0015ccc --- /dev/null +++ b/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_cased_L_12_H_768_A_12.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_cased_L-12_H-768_A-12", + version="1.1.0", + summary="bert_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class Bert(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/README.md b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/README.md rename to modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/README.md diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/__init__.py b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_cased_L_12_H_768_A_12/model/__init__.py rename to modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/__init__.py b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/__init__.py rename to modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/__init__.py diff --git a/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/bert.py b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..d62917a76488580560bf3f5e23e5e8b64b6c652c --- /dev/null +++ b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_cased_L_24_H_1024_A_16.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/transformer_encoder.py b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/module.py b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/module.py new file mode 100644 index 0000000000000000000000000000000000000000..861a2c0a0a5ffcc007060bfb0cb04e98173c7091 --- /dev/null +++ b/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_cased_L_24_H_1024_A_16.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_cased_L-24_H-1024_A-16", + version="1.1.0", + summary="bert_cased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters ", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class Bert(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/README.md b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/README.md rename to modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/README.md diff --git a/hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/__init__.py b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_cased_L_24_H_1024_A_16/model/__init__.py rename to modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/__init__.py b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/__init__.py rename to modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/__init__.py diff --git a/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/bert.py b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..023d67717c5b56367eb7bed0046b864f35b5a1da --- /dev/null +++ b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_chinese_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/transformer_encoder.py b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/module.py b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2d76cfb5d0276aa47d62f16078db1985447aab7e --- /dev/null +++ b/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_chinese_L_12_H_768_A_12.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_chinese_L-12_H-768_A-12", + version="1.1.0", + summary="bert_chinese_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters ", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class BertChinese(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertChinese() diff --git a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/README.md b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/README.md rename to modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/README.md diff --git a/hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/__init__.py b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_chinese_L_12_H_768_A_12/model/__init__.py rename to modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/__init__.py b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/__init__.py rename to modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/__init__.py diff --git a/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/bert.py b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..029047a29fb7b57881fce909b38d7b7158b6f336 --- /dev/null +++ b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_multi_cased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/transformer_encoder.py b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/module.py b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/module.py new file mode 100644 index 0000000000000000000000000000000000000000..abf59f7a69190286fe0501a0b65d6c8563ef451b --- /dev/null +++ b/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_multi_cased_L_12_H_768_A_12.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_multi_cased_L-12_H-768_A-12", + version="1.1.0", + summary="bert_multi_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters ", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class Bert(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/README.md b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/README.md rename to modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/README.md diff --git a/hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/__init__.py b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_multi_cased_L_12_H_768_A_12/model/__init__.py rename to modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/__init__.py b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/__init__.py rename to modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/__init__.py diff --git a/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/bert.py b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..dc02d7e84de1bbc4b16c9c556ea0a8e6790ffa36 --- /dev/null +++ b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_multi_uncased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/transformer_encoder.py b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/module.py b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/module.py new file mode 100644 index 0000000000000000000000000000000000000000..5978a30c5fe613993065d40ed83fa7efb77a26a8 --- /dev/null +++ b/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_multi_uncased_L_12_H_768_A_12.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_multi_uncased_L-12_H-768_A-12", + version="1.0.0", + summary="bert_multi_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters ", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class Bert(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/README.md b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/README.md rename to modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/README.md diff --git a/hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/__init__.py b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_multi_uncased_L_12_H_768_A_12/model/__init__.py rename to modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/__init__.py b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/__init__.py rename to modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/__init__.py diff --git a/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/bert.py b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..cbfa4b24ec652ea511feaf1c330650d191fd7ce4 --- /dev/null +++ b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_uncased_L_12_H_768_A_12.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/transformer_encoder.py b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/module.py b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/module.py new file mode 100644 index 0000000000000000000000000000000000000000..6038a2e22f824dd6e04914dc8f0caf34f1e64ffe --- /dev/null +++ b/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_uncased_L_12_H_768_A_12.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_uncased_L-12_H-768_A-12", + version="1.1.0", + summary="bert_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class Bert(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/README.md b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/README.md rename to modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/README.md diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/__init__.py b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_uncased_L_12_H_768_A_12/model/__init__.py rename to modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/__init__.py diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/__init__.py b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/__init__.py rename to modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/__init__.py diff --git a/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/bert.py b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..9f4b1d9a428b229a3d6490b1678b4c083733e4db --- /dev/null +++ b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from bert_uncased_L_24_H_1024_A_16.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/transformer_encoder.py b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/module.py b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/module.py new file mode 100644 index 0000000000000000000000000000000000000000..efeeabd08fb0fac471c7ccf83c8b18ffa9080f69 --- /dev/null +++ b/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from bert_uncased_L_24_H_1024_A_16.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="bert_uncased_L-24_H-1024_A-16", + version="1.1.0", + summary="bert_uncased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters ", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="nlp/semantic_model", +) +class Bert(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Bert() diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm/README.md b/modules/text/semantic_model/chinese_bert_wwm/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_bert_wwm/README.md rename to modules/text/semantic_model/chinese_bert_wwm/README.md diff --git a/hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/__init__.py b/modules/text/semantic_model/chinese_bert_wwm/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/bert_uncased_L_24_H_1024_A_16/model/__init__.py rename to modules/text/semantic_model/chinese_bert_wwm/__init__.py diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm/__init__.py b/modules/text/semantic_model/chinese_bert_wwm/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_bert_wwm/__init__.py rename to modules/text/semantic_model/chinese_bert_wwm/model/__init__.py diff --git a/modules/text/semantic_model/chinese_bert_wwm/model/bert.py b/modules/text/semantic_model/chinese_bert_wwm/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..19d991f373788305050a5f807fd90fdb500efc0f --- /dev/null +++ b/modules/text/semantic_model/chinese_bert_wwm/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from chinese_bert_wwm.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/chinese_bert_wwm/model/transformer_encoder.py b/modules/text/semantic_model/chinese_bert_wwm/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/chinese_bert_wwm/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/chinese_bert_wwm/module.py b/modules/text/semantic_model/chinese_bert_wwm/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ce2b8b3d369965dedba943bff3788fb29d36d7f4 --- /dev/null +++ b/modules/text/semantic_model/chinese_bert_wwm/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from chinese_bert_wwm.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="chinese-bert-wwm", + version="1.0.0", + summary="chinese-bert-wwm, 12-layer, 768-hidden, 12-heads, 110M parameters ", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class BertWwm(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/README.md b/modules/text/semantic_model/chinese_bert_wwm_ext/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/README.md rename to modules/text/semantic_model/chinese_bert_wwm_ext/README.md diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm/model/__init__.py b/modules/text/semantic_model/chinese_bert_wwm_ext/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_bert_wwm/model/__init__.py rename to modules/text/semantic_model/chinese_bert_wwm_ext/__init__.py diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/__init__.py b/modules/text/semantic_model/chinese_bert_wwm_ext/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/__init__.py rename to modules/text/semantic_model/chinese_bert_wwm_ext/model/__init__.py diff --git a/modules/text/semantic_model/chinese_bert_wwm_ext/model/bert.py b/modules/text/semantic_model/chinese_bert_wwm_ext/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..782289289387a59a75db41ed972fa8e5394c19a3 --- /dev/null +++ b/modules/text/semantic_model/chinese_bert_wwm_ext/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from chinese_bert_wwm_ext.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/chinese_bert_wwm_ext/model/transformer_encoder.py b/modules/text/semantic_model/chinese_bert_wwm_ext/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/chinese_bert_wwm_ext/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/chinese_bert_wwm_ext/module.py b/modules/text/semantic_model/chinese_bert_wwm_ext/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d11fea1c3eae159ca2821b9ad1750021a31048b4 --- /dev/null +++ b/modules/text/semantic_model/chinese_bert_wwm_ext/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from chinese_bert_wwm_ext.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="chinese-bert-wwm-ext", + version="1.0.0", + summary="chinese-bert-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters ", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class BertWwm(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/chinese_electra_base/README.md b/modules/text/semantic_model/chinese_electra_base/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_electra_base/README.md rename to modules/text/semantic_model/chinese_electra_base/README.md diff --git a/hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/__init__.py b/modules/text/semantic_model/chinese_electra_base/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_bert_wwm_ext/model/__init__.py rename to modules/text/semantic_model/chinese_electra_base/__init__.py diff --git a/hub_module/modules/text/semantic_model/chinese_electra_base/__init__.py b/modules/text/semantic_model/chinese_electra_base/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_electra_base/__init__.py rename to modules/text/semantic_model/chinese_electra_base/model/__init__.py diff --git a/modules/text/semantic_model/chinese_electra_base/model/electra.py b/modules/text/semantic_model/chinese_electra_base/model/electra.py new file mode 100644 index 0000000000000000000000000000000000000000..0b81e89717de211a52f8c6217fe582b4708c66a3 --- /dev/null +++ b/modules/text/semantic_model/chinese_electra_base/model/electra.py @@ -0,0 +1,189 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ELECTRA model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from chinese_electra_base.model.transformer_encoder import encoder, pre_process_layer + + +class ElectraConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing electra model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class ElectraModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/chinese_electra_base/model/transformer_encoder.py b/modules/text/semantic_model/chinese_electra_base/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/chinese_electra_base/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/chinese_electra_base/module.py b/modules/text/semantic_model/chinese_electra_base/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b96b87d879752e4eb62c5d5c2db8fae857e58b52 --- /dev/null +++ b/modules/text/semantic_model/chinese_electra_base/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from chinese_electra_base.model.electra import ElectraConfig, ElectraModel + + +@moduleinfo( + name="chinese-electra-base", + version="1.0.0", + summary="chinese-electra-base, 12-layer, 768-hidden, 12-heads, 102M parameters", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class Electra(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + electra_config_path = os.path.join(self.directory, "assets", "config.json") + self.electra_config = ElectraConfig(electra_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + electra = ElectraModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.electra_config, + use_fp16=False) + pooled_output = electra.get_pooled_output() + sequence_output = electra.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Electra() diff --git a/hub_module/modules/text/semantic_model/chinese_electra_small/README.md b/modules/text/semantic_model/chinese_electra_small/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_electra_small/README.md rename to modules/text/semantic_model/chinese_electra_small/README.md diff --git a/hub_module/modules/text/semantic_model/chinese_electra_base/model/__init__.py b/modules/text/semantic_model/chinese_electra_small/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_electra_base/model/__init__.py rename to modules/text/semantic_model/chinese_electra_small/__init__.py diff --git a/hub_module/modules/text/semantic_model/chinese_electra_small/__init__.py b/modules/text/semantic_model/chinese_electra_small/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_electra_small/__init__.py rename to modules/text/semantic_model/chinese_electra_small/model/__init__.py diff --git a/modules/text/semantic_model/chinese_electra_small/model/electra.py b/modules/text/semantic_model/chinese_electra_small/model/electra.py new file mode 100644 index 0000000000000000000000000000000000000000..083da60e6f842200f02a88dbc5c68b8b278c3d77 --- /dev/null +++ b/modules/text/semantic_model/chinese_electra_small/model/electra.py @@ -0,0 +1,199 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ELECTRA model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from chinese_electra_small.model.transformer_encoder import encoder, pre_process_layer + + +class ElectraConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing electra model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class ElectraModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = 128 + self._hidden_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._emb_size != self._hidden_size: + emb_out = fluid.layers.fc( + input=emb_out, + size=self._hidden_size, + act=None, + param_attr=fluid.ParamAttr(name="embeddings_project.w_0", initializer=self._param_initializer), + num_flatten_dims=2, + bias_attr="embeddings_project.b_0") + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._hidden_size // self._n_head, + d_value=self._hidden_size // self._n_head, + d_model=self._hidden_size, + d_inner_hid=self._hidden_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._hidden_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._hidden_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/chinese_electra_small/model/transformer_encoder.py b/modules/text/semantic_model/chinese_electra_small/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/chinese_electra_small/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/chinese_electra_small/module.py b/modules/text/semantic_model/chinese_electra_small/module.py new file mode 100644 index 0000000000000000000000000000000000000000..55850d5b2396979848485886f916a1da9c982d2f --- /dev/null +++ b/modules/text/semantic_model/chinese_electra_small/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from chinese_electra_small.model.electra import ElectraConfig, ElectraModel + + +@moduleinfo( + name="chinese-electra-small", + version="1.0.0", + summary="chinese-electra-small, 12-layer, 256-hidden, 4-heads, 12M parameters", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class Electra(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + electra_config_path = os.path.join(self.directory, "assets", "config.json") + self.electra_config = ElectraConfig(electra_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + electra = ElectraModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.electra_config, + use_fp16=False) + pooled_output = electra.get_pooled_output() + sequence_output = electra.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = Electra() diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/README.md b/modules/text/semantic_model/chinese_roberta_wwm_ext/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/README.md rename to modules/text/semantic_model/chinese_roberta_wwm_ext/README.md diff --git a/hub_module/modules/text/semantic_model/chinese_electra_small/model/__init__.py b/modules/text/semantic_model/chinese_roberta_wwm_ext/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_electra_small/model/__init__.py rename to modules/text/semantic_model/chinese_roberta_wwm_ext/__init__.py diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/__init__.py b/modules/text/semantic_model/chinese_roberta_wwm_ext/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/__init__.py rename to modules/text/semantic_model/chinese_roberta_wwm_ext/model/__init__.py diff --git a/modules/text/semantic_model/chinese_roberta_wwm_ext/model/bert.py b/modules/text/semantic_model/chinese_roberta_wwm_ext/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..29372adaf16a9717939cc07c4c135d27dba1dd61 --- /dev/null +++ b/modules/text/semantic_model/chinese_roberta_wwm_ext/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from chinese_roberta_wwm_ext.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/chinese_roberta_wwm_ext/model/transformer_encoder.py b/modules/text/semantic_model/chinese_roberta_wwm_ext/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/chinese_roberta_wwm_ext/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/chinese_roberta_wwm_ext/module.py b/modules/text/semantic_model/chinese_roberta_wwm_ext/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d45f232b829972274a5a3077bd9e7c22655c6dcb --- /dev/null +++ b/modules/text/semantic_model/chinese_roberta_wwm_ext/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from chinese_roberta_wwm_ext.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="chinese-roberta-wwm-ext", + version="1.0.0", + summary="chinese-roberta-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters ", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class BertWwm(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/README.md b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/README.md rename to modules/text/semantic_model/chinese_roberta_wwm_ext_large/README.md diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/__init__.py b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext/model/__init__.py rename to modules/text/semantic_model/chinese_roberta_wwm_ext_large/__init__.py diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/__init__.py b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/__init__.py rename to modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/__init__.py diff --git a/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/bert.py b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..782cfacbe7ecf5a44b205d77a48fed6190c498bd --- /dev/null +++ b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from chinese_roberta_wwm_ext_large.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/transformer_encoder.py b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/chinese_roberta_wwm_ext_large/module.py b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ec67f8739c5abe1c42b1e165d9b17c66a6ae9a50 --- /dev/null +++ b/modules/text/semantic_model/chinese_roberta_wwm_ext_large/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from chinese_roberta_wwm_ext_large.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="chinese-roberta-wwm-ext-large", + version="1.0.0", + summary="chinese-roberta-wwm-ext-large, 24-layer, 1024-hidden, 16-heads, 340M parameters ", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class BertWwm(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/ernie/README.md b/modules/text/semantic_model/ernie/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/ernie/README.md rename to modules/text/semantic_model/ernie/README.md diff --git a/hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/__init__.py b/modules/text/semantic_model/ernie/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/chinese_roberta_wwm_ext_large/model/__init__.py rename to modules/text/semantic_model/ernie/__init__.py diff --git a/hub_module/modules/text/semantic_model/ernie/model/__init__.py b/modules/text/semantic_model/ernie/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie/model/__init__.py rename to modules/text/semantic_model/ernie/model/__init__.py diff --git a/modules/text/semantic_model/ernie/model/ernie.py b/modules/text/semantic_model/ernie/model/ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..88af7b9e64e4b92d796f293215c5481b5e359bb1 --- /dev/null +++ b/modules/text/semantic_model/ernie/model/ernie.py @@ -0,0 +1,199 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ernie model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +import json + +import six +import paddle.fluid as fluid +from io import open +from paddlehub.common.logger import logger + +from ernie.model.transformer_encoder import encoder, pre_process_layer + + +class ErnieConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path, 'r', encoding='utf8') as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing Ernie model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict.get(key, None) + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + logger.info('%s: %s' % (arg, value)) + logger.info('------------------------------------------------') + + +class ErnieModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/ernie/model/transformer_encoder.py b/modules/text/semantic_model/ernie/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/ernie/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/ernie/module.py b/modules/text/semantic_model/ernie/module.py new file mode 100644 index 0000000000000000000000000000000000000000..03167a5db24419b912b782df3bf70a79d28316a2 --- /dev/null +++ b/modules/text/semantic_model/ernie/module.py @@ -0,0 +1,77 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from ernie.model.ernie import ErnieModel, ErnieConfig + + +@moduleinfo( + name="ernie", + version="1.2.0", + summary="Baidu's ERNIE, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained", + author="baidu-nlp", + author_email="", + type="nlp/semantic_model", +) +class Ernie(TransformerModule): + def _initialize(self): + ernie_config_path = os.path.join(self.directory, "assets", "ernie_config.json") + self.ernie_config = ErnieConfig(ernie_config_path) + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt")\ + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + self.ernie_config._config_dict['use_task_id'] = False + ernie = ErnieModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.ernie_config, + use_fp16=False) + pooled_output = ernie.get_pooled_output() + sequence_output = ernie.get_sequence_output() + return pooled_output, sequence_output + + def param_prefix(self): + return "@HUB_ernie-stable@" + + +if __name__ == '__main__': + test_module = Ernie() diff --git a/hub_module/modules/text/semantic_model/ernie_tiny/README.md b/modules/text/semantic_model/ernie_tiny/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_tiny/README.md rename to modules/text/semantic_model/ernie_tiny/README.md diff --git a/hub_module/modules/text/semantic_model/ernie/__init__.py b/modules/text/semantic_model/ernie_tiny/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie/__init__.py rename to modules/text/semantic_model/ernie_tiny/__init__.py diff --git a/hub_module/modules/text/semantic_model/ernie_tiny/model/__init__.py b/modules/text/semantic_model/ernie_tiny/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_tiny/model/__init__.py rename to modules/text/semantic_model/ernie_tiny/model/__init__.py diff --git a/modules/text/semantic_model/ernie_tiny/model/ernie.py b/modules/text/semantic_model/ernie_tiny/model/ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..6fa0281f4bfdb99356017fcf5e7a29f1e1edd2a7 --- /dev/null +++ b/modules/text/semantic_model/ernie_tiny/model/ernie.py @@ -0,0 +1,236 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ernie model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +import json +import six + +import paddle.fluid as fluid +from io import open +from paddlehub.common.logger import logger + +from ernie_tiny.model.transformer_encoder import encoder, pre_process_layer + + +class ErnieConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path, 'r', encoding='utf8') as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing Ernie model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict.get(key, None) + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + logger.info('%s: %s' % (arg, value)) + logger.info('------------------------------------------------') + + +class ErnieModel(object): + def __init__(self, + src_ids, + position_ids, + sentence_ids, + task_ids, + input_mask, + config, + weight_sharing=True, + use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + if config['sent_type_vocab_size']: # line 47: return self._config_dict.get(key, None) + self._sent_types = config['sent_type_vocab_size'] + else: + self._sent_types = config['type_vocab_size'] + + self._use_task_id = config['use_task_id'] + if self._use_task_id: + self._task_types = config['task_type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._task_emb_name = "task_embedding" + self._dtype = "float16" if use_fp16 else "float32" + self._emb_dtype = "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, task_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + if self._use_task_id: + task_emb_out = fluid.layers.embedding( + task_ids, + size=[self._task_types, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._task_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + task_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + if self._dtype == "float16": + self._enc_out = fluid.layers.cast(x=self._enc_out, dtype=self._emb_dtype) + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_lm_output(self, mask_label, mask_pos): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + self.next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + + # transform: layer norm + mask_trans_feat = fluid.layers.layer_norm( + mask_trans_feat, + begin_norm_axis=len(mask_trans_feat.shape) - 1, + param_attr=fluid.ParamAttr( + name='mask_lm_trans_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_layer_norm_bias', initializer=fluid.initializer.Constant(1.))) + # transform: layer norm + #mask_trans_feat = pre_process_layer( + # mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._emb_dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + return mean_mask_lm_loss + + def get_task_output(self, task, task_labels): + task_fc_out = fluid.layers.fc( + input=self.next_sent_feat, + size=task["num_labels"], + param_attr=fluid.ParamAttr(name=task["task_name"] + "_fc.w_0", initializer=self._param_initializer), + bias_attr=task["task_name"] + "_fc.b_0") + task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( + logits=task_fc_out, label=task_labels, return_softmax=True) + task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) + mean_task_loss = fluid.layers.mean(task_loss) + return mean_task_loss, task_acc diff --git a/modules/text/semantic_model/ernie_tiny/model/transformer_encoder.py b/modules/text/semantic_model/ernie_tiny/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/ernie_tiny/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/ernie_tiny/module.py b/modules/text/semantic_model/ernie_tiny/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c6677af01361f1cfae383474b5687514cee10f4e --- /dev/null +++ b/modules/text/semantic_model/ernie_tiny/module.py @@ -0,0 +1,80 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from ernie_tiny.model.ernie import ErnieModel, ErnieConfig + + +@moduleinfo( + name="ernie_tiny", + version="1.1.0", + summary="Baidu's ERNIE-tiny, Enhanced Representation through kNowledge IntEgration, tiny version, max_seq_len=512", + author="baidu-nlp", + author_email="", + type="nlp/semantic_model", +) +class ErnieTiny(TransformerModule): + def _initialize(self): + ernie_config_path = os.path.join(self.directory, "assets", "ernie_tiny_config.json") + self.ernie_config = ErnieConfig(ernie_config_path) + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.spm_path = os.path.join(self.directory, "assets", "spm_cased_simp_sampled.model") + self.word_dict_path = os.path.join(self.directory, "assets", "dict.wordseg.pickle") + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + self.ernie_config._config_dict['use_task_id'] = False + ernie = ErnieModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + task_ids=None, + input_mask=input_mask, + config=self.ernie_config, + use_fp16=False) + pooled_output = ernie.get_pooled_output() + sequence_output = ernie.get_sequence_output() + return pooled_output, sequence_output + + def param_prefix(self): + return "@HUB_ernie-tiny@" + + +if __name__ == '__main__': + test_module = ErnieTiny() diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/README.md b/modules/text/semantic_model/ernie_v2_eng_base/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_v2_eng_base/README.md rename to modules/text/semantic_model/ernie_v2_eng_base/README.md diff --git a/hub_module/modules/text/semantic_model/ernie_tiny/__init__.py b/modules/text/semantic_model/ernie_v2_eng_base/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_tiny/__init__.py rename to modules/text/semantic_model/ernie_v2_eng_base/__init__.py diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/__init__.py b/modules/text/semantic_model/ernie_v2_eng_base/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_v2_eng_base/__init__.py rename to modules/text/semantic_model/ernie_v2_eng_base/model/__init__.py diff --git a/modules/text/semantic_model/ernie_v2_eng_base/model/ernie.py b/modules/text/semantic_model/ernie_v2_eng_base/model/ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..8eb74d001161e9c6c64fc5920bf02578e8b0d7b1 --- /dev/null +++ b/modules/text/semantic_model/ernie_v2_eng_base/model/ernie.py @@ -0,0 +1,238 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ernie model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +import json + +import six +import paddle.fluid as fluid +from io import open +from paddlehub.common.logger import logger + +from ernie_v2_eng_base.model.transformer_encoder import encoder, pre_process_layer + + +class ErnieConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path, 'r', encoding='utf8') as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing Ernie model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict.get(key, None) + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + logger.info('%s: %s' % (arg, value)) + logger.info('------------------------------------------------') + + +class ErnieModel(object): + def __init__(self, + src_ids, + position_ids, + sentence_ids, + task_ids, + input_mask, + config, + weight_sharing=True, + use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + if config['sent_type_vocab_size']: + self._sent_types = config['sent_type_vocab_size'] + else: + self._sent_types = config['type_vocab_size'] + + self._use_task_id = config['use_task_id'] + if self._use_task_id: + self._task_types = config['task_type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._task_emb_name = "task_embedding" + self._dtype = "float16" if use_fp16 else "float32" + self._emb_dtype = "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, task_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + if self._use_task_id: + task_emb_out = fluid.layers.embedding( + task_ids, + size=[self._task_types, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._task_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + task_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + if self._dtype == "float16": + next_sent_feat = fluid.layers.cast(x=next_sent_feat, dtype=self._emb_dtype) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_lm_output(self, mask_label, mask_pos): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + self.next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + if self._dtype == "float16": + mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + + # transform: layer norm + mask_trans_feat = fluid.layers.layer_norm( + mask_trans_feat, + begin_norm_axis=len(mask_trans_feat.shape) - 1, + param_attr=fluid.ParamAttr( + name='mask_lm_trans_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_layer_norm_bias', initializer=fluid.initializer.Constant(1.))) + # transform: layer norm + #mask_trans_feat = pre_process_layer( + # mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._emb_dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + return mean_mask_lm_loss + + def get_task_output(self, task, task_labels): + task_fc_out = fluid.layers.fc( + input=self.next_sent_feat, + size=task["num_labels"], + param_attr=fluid.ParamAttr(name=task["task_name"] + "_fc.w_0", initializer=self._param_initializer), + bias_attr=task["task_name"] + "_fc.b_0") + task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( + logits=task_fc_out, label=task_labels, return_softmax=True) + task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) + mean_task_loss = fluid.layers.mean(task_loss) + return mean_task_loss, task_acc diff --git a/modules/text/semantic_model/ernie_v2_eng_base/model/transformer_encoder.py b/modules/text/semantic_model/ernie_v2_eng_base/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/ernie_v2_eng_base/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/ernie_v2_eng_base/module.py b/modules/text/semantic_model/ernie_v2_eng_base/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fd2d957f0115a743d6d77dd622e1f1c81a8a8146 --- /dev/null +++ b/modules/text/semantic_model/ernie_v2_eng_base/module.py @@ -0,0 +1,76 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from ernie_v2_eng_base.model.ernie import ErnieModel, ErnieConfig + + +@moduleinfo( + name="ernie_v2_eng_base", + version="1.1.0", + summary= + "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, A Continual Pre-training Framework for Language Understanding. 12-layer, 768-hidden, 12-heads, 110M parameters.", + author="baidu-nlp", + author_email="", + type="nlp/semantic_model", +) +class ErnieV2EngBase(TransformerModule): + def _initialize(self): + ernie_config_path = os.path.join(self.directory, "assets", "ernie_config.json") + self.ernie_config = ErnieConfig(ernie_config_path) + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt")\ + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + self.ernie_config._config_dict['use_task_id'] = False + ernie = ErnieModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + task_ids=None, + input_mask=input_mask, + config=self.ernie_config, + use_fp16=False) + pooled_output = ernie.get_pooled_output() + sequence_output = ernie.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = ErnieV2EngBase() diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/README.md b/modules/text/semantic_model/ernie_v2_eng_large/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_v2_eng_large/README.md rename to modules/text/semantic_model/ernie_v2_eng_large/README.md diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/__init__.py b/modules/text/semantic_model/ernie_v2_eng_large/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_v2_eng_base/model/__init__.py rename to modules/text/semantic_model/ernie_v2_eng_large/__init__.py diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/__init__.py b/modules/text/semantic_model/ernie_v2_eng_large/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_v2_eng_large/__init__.py rename to modules/text/semantic_model/ernie_v2_eng_large/model/__init__.py diff --git a/modules/text/semantic_model/ernie_v2_eng_large/model/ernie.py b/modules/text/semantic_model/ernie_v2_eng_large/model/ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..c2ae69262362f15bb49cca955b48786852fdfe1b --- /dev/null +++ b/modules/text/semantic_model/ernie_v2_eng_large/model/ernie.py @@ -0,0 +1,238 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ernie model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import absolute_import + +import json + +import six +import paddle.fluid as fluid +from io import open +from paddlehub.common.logger import logger + +from ernie_v2_eng_large.model.transformer_encoder import encoder, pre_process_layer + + +class ErnieConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path, 'r', encoding='utf8') as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing Ernie model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict.get(key, None) + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + logger.info('%s: %s' % (arg, value)) + logger.info('------------------------------------------------') + + +class ErnieModel(object): + def __init__(self, + src_ids, + position_ids, + sentence_ids, + task_ids, + input_mask, + config, + weight_sharing=True, + use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + if config['sent_type_vocab_size']: + self._sent_types = config['sent_type_vocab_size'] + else: + self._sent_types = config['type_vocab_size'] + + self._use_task_id = config['use_task_id'] + if self._use_task_id: + self._task_types = config['task_type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._task_emb_name = "task_embedding" + self._dtype = "float16" if use_fp16 else "float32" + self._emb_dtype = "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, task_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + if self._use_task_id: + task_emb_out = fluid.layers.embedding( + task_ids, + size=[self._task_types, self._emb_size], + dtype=self._emb_dtype, + param_attr=fluid.ParamAttr(name=self._task_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + task_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + if self._dtype == "float16": + next_sent_feat = fluid.layers.cast(x=next_sent_feat, dtype=self._emb_dtype) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_lm_output(self, mask_label, mask_pos): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + self.next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + if self._dtype == "float16": + mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + + # transform: layer norm + mask_trans_feat = fluid.layers.layer_norm( + mask_trans_feat, + begin_norm_axis=len(mask_trans_feat.shape) - 1, + param_attr=fluid.ParamAttr( + name='mask_lm_trans_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_layer_norm_bias', initializer=fluid.initializer.Constant(1.))) + # transform: layer norm + #mask_trans_feat = pre_process_layer( + # mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._emb_dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + return mean_mask_lm_loss + + def get_task_output(self, task, task_labels): + task_fc_out = fluid.layers.fc( + input=self.next_sent_feat, + size=task["num_labels"], + param_attr=fluid.ParamAttr(name=task["task_name"] + "_fc.w_0", initializer=self._param_initializer), + bias_attr=task["task_name"] + "_fc.b_0") + task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( + logits=task_fc_out, label=task_labels, return_softmax=True) + task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) + mean_task_loss = fluid.layers.mean(task_loss) + return mean_task_loss, task_acc diff --git a/modules/text/semantic_model/ernie_v2_eng_large/model/transformer_encoder.py b/modules/text/semantic_model/ernie_v2_eng_large/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/ernie_v2_eng_large/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/ernie_v2_eng_large/module.py b/modules/text/semantic_model/ernie_v2_eng_large/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8d15bea6c27be2afc07da784ccbcb17d2f9809ee --- /dev/null +++ b/modules/text/semantic_model/ernie_v2_eng_large/module.py @@ -0,0 +1,76 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from ernie_v2_eng_large.model.ernie import ErnieModel, ErnieConfig + + +@moduleinfo( + name="ernie_v2_eng_large", + version="1.1.0", + summary= + "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, A Continual Pre-training Framework for Language Understanding. 12-layer, 768-hidden, 12-heads, 110M parameters.", + author="baidu-nlp", + author_email="", + type="nlp/semantic_model", +) +class ErnieV2EngLarge(TransformerModule): + def _initialize(self): + ernie_config_path = os.path.join(self.directory, "assets", "ernie_config.json") + self.ernie_config = ErnieConfig(ernie_config_path) + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt")\ + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + self.ernie_config._config_dict['use_task_id'] = False + ernie = ErnieModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + task_ids=None, + input_mask=input_mask, + config=self.ernie_config, + use_fp16=False) + pooled_output = ernie.get_pooled_output() + sequence_output = ernie.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = ErnieV2EngLarge() diff --git a/hub_module/modules/text/semantic_model/lda_news/README.md b/modules/text/semantic_model/lda_news/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/lda_news/README.md rename to modules/text/semantic_model/lda_news/README.md diff --git a/hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/__init__.py b/modules/text/semantic_model/lda_news/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/ernie_v2_eng_large/model/__init__.py rename to modules/text/semantic_model/lda_news/__init__.py diff --git a/hub_module/modules/text/semantic_model/lda_news/config.py b/modules/text/semantic_model/lda_news/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_news/config.py rename to modules/text/semantic_model/lda_news/config.py diff --git a/modules/text/semantic_model/lda_news/document.py b/modules/text/semantic_model/lda_news/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/lda_news/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/lda_news/inference_engine.py b/modules/text/semantic_model/lda_news/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..96aaaafb53f752edc197eaca2780f4d91fb64374 --- /dev/null +++ b/modules/text/semantic_model/lda_news/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from lda_news.config import ModelConfig +from lda_news.util import load_prototxt, fix_random_seed, rand_k +from lda_news.model import TopicModel +from lda_news.sampler import GibbsSampler, MHSampler +from lda_news.document import LDADoc, SLDADoc, Token, Sentence +from lda_news.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/lda_news/model.py b/modules/text/semantic_model/lda_news/model.py new file mode 100644 index 0000000000000000000000000000000000000000..3ef089f9058e1a405d24440b152767fac745df9b --- /dev/null +++ b/modules/text/semantic_model/lda_news/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from lda_news.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/lda_news/module.py b/modules/text/semantic_model/lda_news/module.py new file mode 100644 index 0000000000000000000000000000000000000000..6066ce0dc61382d085587a4634f259879c3441e9 --- /dev/null +++ b/modules/text/semantic_model/lda_news/module.py @@ -0,0 +1,193 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from lda_news.inference_engine import InferenceEngine +from lda_news.document import LDADoc, SLDADoc +from lda_news.semantic_matching import SemanticMatching, WordAndDis +from lda_news.tokenizer import LACTokenizer, SimpleTokenizer +from lda_news.config import ModelType +from lda_news.vocab import Vocab, WordCount + + +@moduleinfo( + name="lda_news", + version="1.0.2", + summary= + "This is a PaddleHub Module for LDA topic model in news dataset, where we can calculate doc distance, calculate the similarity between query and document, etc", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'news') + self.conf_file = 'lda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish initialization.") + + def cal_doc_distance(self, doc_text1, doc_text2): + """ + This interface calculates the distance between documents. + + Args: + doc_text1(str): the input document text 1. + doc_text2(str): the input document text 2. + + Returns: + jsd(float): Jensen-Shannon Divergence distance of two documents. + hd(float): Hellinger Distance of two documents. + """ + doc1_tokens = self.__tokenizer.tokenize(doc_text1) + doc2_tokens = self.__tokenizer.tokenize(doc_text2) + + # Document topic inference. + doc1, doc2 = LDADoc(), LDADoc() + self.__engine.infer(doc1_tokens, doc1) + self.__engine.infer(doc2_tokens, doc2) + + # To calculate jsd, we need dense document topic distribution. + dense_dict1 = doc1.dense_topic_dist() + dense_dict2 = doc2.dense_topic_dist() + # Calculate the distance between distributions. + # The smaller the distance, the higher the document semantic similarity. + sm = SemanticMatching() + jsd = sm.jensen_shannon_divergence(dense_dict1, dense_dict2) + hd = sm.hellinger_distance(dense_dict1, dense_dict2) + + return jsd, hd + + def cal_doc_keywords_similarity(self, document, top_k=10): + """ + This interface can be used to find top k keywords of document. + + Args: + document(str): the input document text. + top_k(int): top k keywords of this document. + + Returns: + results(list): contains top_k keywords and their corresponding + similarity compared to document. + """ + d_tokens = self.__tokenizer.tokenize(document) + + # Do topic inference on documents to obtain topic distribution. + doc = LDADoc() + self.__engine.infer(d_tokens, doc) + doc_topic_dist = doc.sparse_topic_dist() + + items = [] + words = set() + for word in d_tokens: + if word in words: + continue + words.add(word) + wd = WordAndDis() + wd.word = word + sm = SemanticMatching() + wd.distance = sm.likelihood_based_similarity( + terms=[word], doc_topic_dist=doc_topic_dist, model=self.__engine.get_model()) + items.append(wd) + + def take_elem(word_dis): + return word_dis.distance + + items.sort(key=take_elem, reverse=True) + + results = [] + size = len(items) + for i in range(top_k): + if i >= size: + break + results.append({"word": items[i].word, "similarity": items[i].distance}) + + return results + + def cal_query_doc_similarity(self, query, document): + """ + This interface calculates the similarity between query and document. + + Args: + query(str): the input query text. + document(str): the input document text. + + Returns: + lda_sim(float): likelihood based similarity between query and document + based on LDA. + """ + q_tokens = self.__tokenizer.tokenize(query) + d_tokens = self.__tokenizer.tokenize(document) + + doc = LDADoc() + self.__engine.infer(d_tokens, doc) + doc_topic_dist = doc.sparse_topic_dist() + + sm = SemanticMatching() + lda_sim = sm.likelihood_based_similarity(q_tokens, doc_topic_dist, self.__engine.get_model()) + + return lda_sim + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + doc = LDADoc() + self.__engine.infer(tokens, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns first k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and corresponding + probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/lda_news/sampler.py b/modules/text/semantic_model/lda_news/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..e3800ab0dc178a4654b1d0870ecb7c785a467d5e --- /dev/null +++ b/modules/text/semantic_model/lda_news/sampler.py @@ -0,0 +1,284 @@ +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from lda_news.document import LDADoc, SLDADoc, Token, Sentence +from lda_news.vose_alias import VoseAlias +from lda_news.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/lda_news/semantic_matching.py b/modules/text/semantic_model/lda_news/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_news/semantic_matching.py rename to modules/text/semantic_model/lda_news/semantic_matching.py diff --git a/modules/text/semantic_model/lda_news/tokenizer.py b/modules/text/semantic_model/lda_news/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..e59037d88207298d227fc0f476b83f3229e80c36 --- /dev/null +++ b/modules/text/semantic_model/lda_news/tokenizer.py @@ -0,0 +1,125 @@ +"""This file defines tokenizer class object. +""" + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/lda_news/util.py b/modules/text/semantic_model/lda_news/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_news/util.py rename to modules/text/semantic_model/lda_news/util.py diff --git a/modules/text/semantic_model/lda_news/vocab.py b/modules/text/semantic_model/lda_news/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/lda_news/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/lda_news/vose_alias.py b/modules/text/semantic_model/lda_news/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_news/vose_alias.py rename to modules/text/semantic_model/lda_news/vose_alias.py diff --git a/hub_module/modules/text/semantic_model/lda_novel/README.md b/modules/text/semantic_model/lda_novel/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/lda_novel/README.md rename to modules/text/semantic_model/lda_novel/README.md diff --git a/hub_module/modules/text/semantic_model/lda_news/__init__.py b/modules/text/semantic_model/lda_novel/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_news/__init__.py rename to modules/text/semantic_model/lda_novel/__init__.py diff --git a/hub_module/modules/text/semantic_model/lda_novel/config.py b/modules/text/semantic_model/lda_novel/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_novel/config.py rename to modules/text/semantic_model/lda_novel/config.py diff --git a/modules/text/semantic_model/lda_novel/document.py b/modules/text/semantic_model/lda_novel/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/lda_novel/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/lda_novel/inference_engine.py b/modules/text/semantic_model/lda_novel/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..a5e8c8b184733e6e3c4c3379d4a04a70ab65da9e --- /dev/null +++ b/modules/text/semantic_model/lda_novel/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from lda_novel.config import ModelConfig +from lda_novel.util import load_prototxt, fix_random_seed, rand_k +from lda_novel.model import TopicModel +from lda_novel.sampler import GibbsSampler, MHSampler +from lda_novel.document import LDADoc, SLDADoc, Token, Sentence +from lda_novel.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/lda_novel/model.py b/modules/text/semantic_model/lda_novel/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f16962be868c1cb33666a89f94843782f7e012fe --- /dev/null +++ b/modules/text/semantic_model/lda_novel/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from lda_novel.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/lda_novel/module.py b/modules/text/semantic_model/lda_novel/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ed211ac4017312bf469b9cb70d87c1c321a00a1a --- /dev/null +++ b/modules/text/semantic_model/lda_novel/module.py @@ -0,0 +1,193 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from lda_novel.inference_engine import InferenceEngine +from lda_novel.document import LDADoc, SLDADoc +from lda_novel.semantic_matching import SemanticMatching, WordAndDis +from lda_novel.tokenizer import LACTokenizer, SimpleTokenizer +from lda_novel.config import ModelType +from lda_novel.vocab import Vocab, WordCount + + +@moduleinfo( + name="lda_novel", + version="1.0.2", + summary= + "This is a PaddleHub Module for LDA topic model in novel dataset, where we can calculate doc distance, calculate the similarity between query and document, etc.", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'novel') + self.conf_file = 'lda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish initialization.") + + def cal_doc_distance(self, doc_text1, doc_text2): + """ + This interface calculates the distance between documents. + + Args: + doc_text1(str): the input document text 1. + doc_text2(str): the input document text 2. + + Returns: + jsd(float): Jensen-Shannon Divergence distance of two documents. + hd(float): Hellinger Distance of two documents. + """ + doc1_tokens = self.__tokenizer.tokenize(doc_text1) + doc2_tokens = self.__tokenizer.tokenize(doc_text2) + + # Document topic inference. + doc1, doc2 = LDADoc(), LDADoc() + self.__engine.infer(doc1_tokens, doc1) + self.__engine.infer(doc2_tokens, doc2) + + # To calculate jsd, we need dense document topic distribution. + dense_dict1 = doc1.dense_topic_dist() + dense_dict2 = doc2.dense_topic_dist() + # Calculate the distance between distributions. + # The smaller the distance, the higher the document semantic similarity. + sm = SemanticMatching() + jsd = sm.jensen_shannon_divergence(dense_dict1, dense_dict2) + hd = sm.hellinger_distance(dense_dict1, dense_dict2) + + return jsd, hd + + def cal_doc_keywords_similarity(self, document, top_k=10): + """ + This interface can be used to find topk keywords of document. + + Args: + document(str): the input document text. + top_k(int): top k keywords of this document. + + Returns: + results(list): contains top_k keywords and their corresponding + similarity compared to document. + """ + d_tokens = self.__tokenizer.tokenize(document) + + # Do topic inference on documents to obtain topic distribution. + doc = LDADoc() + self.__engine.infer(d_tokens, doc) + doc_topic_dist = doc.sparse_topic_dist() + + items = [] + words = set() + for word in d_tokens: + if word in words: + continue + words.add(word) + wd = WordAndDis() + wd.word = word + sm = SemanticMatching() + wd.distance = sm.likelihood_based_similarity( + terms=[word], doc_topic_dist=doc_topic_dist, model=self.__engine.get_model()) + items.append(wd) + + def take_elem(word_dis): + return word_dis.distance + + items.sort(key=take_elem, reverse=True) + + results = [] + size = len(items) + for i in range(top_k): + if i >= size: + break + results.append({"word": items[i].word, "similarity": items[i].distance}) + + return results + + def cal_query_doc_similarity(self, query, document): + """ + This interface calculates the similarity between query and document. + + Args: + query(str): the input query text. + document(str): the input document text. + + Returns: + lda_sim(float): likelihood based similarity between query and document + based on LDA. + """ + q_tokens = self.__tokenizer.tokenize(query) + d_tokens = self.__tokenizer.tokenize(document) + + doc = LDADoc() + self.__engine.infer(d_tokens, doc) + doc_topic_dist = doc.sparse_topic_dist() + + sm = SemanticMatching() + lda_sim = sm.likelihood_based_similarity(q_tokens, doc_topic_dist, self.__engine.get_model()) + + return lda_sim + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + doc = LDADoc() + self.__engine.infer(tokens, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns the k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and corresponding + probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/lda_novel/sampler.py b/modules/text/semantic_model/lda_novel/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..d345b417fe2cd2a40f93c71ef6dbe1536e90ab72 --- /dev/null +++ b/modules/text/semantic_model/lda_novel/sampler.py @@ -0,0 +1,286 @@ +import os + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from lda_novel.document import LDADoc, SLDADoc, Token, Sentence +from lda_novel.vose_alias import VoseAlias +from lda_novel.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/lda_novel/semantic_matching.py b/modules/text/semantic_model/lda_novel/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_novel/semantic_matching.py rename to modules/text/semantic_model/lda_novel/semantic_matching.py diff --git a/modules/text/semantic_model/lda_novel/tokenizer.py b/modules/text/semantic_model/lda_novel/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..585aed885b63b0e2a2d450b77a6d018615c86b04 --- /dev/null +++ b/modules/text/semantic_model/lda_novel/tokenizer.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from paddlehub.common.logger import logger + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/lda_novel/util.py b/modules/text/semantic_model/lda_novel/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_novel/util.py rename to modules/text/semantic_model/lda_novel/util.py diff --git a/modules/text/semantic_model/lda_novel/vocab.py b/modules/text/semantic_model/lda_novel/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/lda_novel/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/lda_novel/vose_alias.py b/modules/text/semantic_model/lda_novel/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_novel/vose_alias.py rename to modules/text/semantic_model/lda_novel/vose_alias.py diff --git a/hub_module/modules/text/semantic_model/lda_webpage/README.md b/modules/text/semantic_model/lda_webpage/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/lda_webpage/README.md rename to modules/text/semantic_model/lda_webpage/README.md diff --git a/hub_module/modules/text/semantic_model/lda_novel/__init__.py b/modules/text/semantic_model/lda_webpage/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_novel/__init__.py rename to modules/text/semantic_model/lda_webpage/__init__.py diff --git a/hub_module/modules/text/semantic_model/lda_webpage/config.py b/modules/text/semantic_model/lda_webpage/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_webpage/config.py rename to modules/text/semantic_model/lda_webpage/config.py diff --git a/modules/text/semantic_model/lda_webpage/document.py b/modules/text/semantic_model/lda_webpage/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/lda_webpage/inference_engine.py b/modules/text/semantic_model/lda_webpage/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..718f2e012b4736435e1589380aa1bc774ad84141 --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from lda_webpage.config import ModelConfig +from lda_webpage.util import load_prototxt, fix_random_seed, rand_k +from lda_webpage.model import TopicModel +from lda_webpage.sampler import GibbsSampler, MHSampler +from lda_webpage.document import LDADoc, SLDADoc, Token, Sentence +from lda_webpage.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/lda_webpage/model.py b/modules/text/semantic_model/lda_webpage/model.py new file mode 100644 index 0000000000000000000000000000000000000000..8c05da149a159bd160bf6cffd751bf21548a4352 --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from lda_webpage.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/lda_webpage/module.py b/modules/text/semantic_model/lda_webpage/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ebe1da43f731167eb74f61790805340c577f2dd1 --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/module.py @@ -0,0 +1,192 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from lda_webpage.inference_engine import InferenceEngine +from lda_webpage.document import LDADoc +from lda_webpage.semantic_matching import SemanticMatching, WordAndDis +from lda_webpage.tokenizer import LACTokenizer, SimpleTokenizer +from lda_webpage.config import ModelType +from lda_webpage.vocab import Vocab, WordCount + + +@moduleinfo( + name="lda_webpage", + version="1.0.2", + summary= + "This is a PaddleHub Module for LDA topic model in webpage dataset, where we can calculate doc distance, calculate the similarity between query and document, etc.", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'webpage') + self.conf_file = 'lda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish initialization.") + + def cal_doc_distance(self, doc_text1, doc_text2): + """ + This interface calculates the distance between documents. + + Args: + doc_text1(str): the input document text 1. + doc_text2(str): the input document text 2. + + Returns: + jsd(float): Jensen-Shannon Divergence distance of two documents. + hd(float): Hellinger Distance of two documents. + """ + doc1_tokens = self.__tokenizer.tokenize(doc_text1) + doc2_tokens = self.__tokenizer.tokenize(doc_text2) + + # Document topic inference. + doc1, doc2 = LDADoc(), LDADoc() + self.__engine.infer(doc1_tokens, doc1) + self.__engine.infer(doc2_tokens, doc2) + + # To calculate jsd, we need dense document topic distribution. + dense_dict1 = doc1.dense_topic_dist() + dense_dict2 = doc2.dense_topic_dist() + # Calculate the distance between distributions. + # The smaller the distance, the higher the document semantic similarity. + sm = SemanticMatching() + jsd = sm.jensen_shannon_divergence(dense_dict1, dense_dict2) + hd = sm.hellinger_distance(dense_dict1, dense_dict2) + + return jsd, hd + + def cal_doc_keywords_similarity(self, document, top_k=10): + """ + This interface can be used to find topk keywords of document. + + Args: + document(str): the input document text. + top_k(int): top k keywords of this document. + + Returns: + results(list): contains top_k keywords and their + corresponding similarity compared to document. + """ + d_tokens = self.__tokenizer.tokenize(document) + + # Do topic inference on documents to obtain topic distribution. + doc = LDADoc() + self.__engine.infer(d_tokens, doc) + doc_topic_dist = doc.sparse_topic_dist() + + items = [] + words = set() + for word in d_tokens: + if word in words: + continue + words.add(word) + wd = WordAndDis() + wd.word = word + sm = SemanticMatching() + wd.distance = sm.likelihood_based_similarity( + terms=[word], doc_topic_dist=doc_topic_dist, model=self.__engine.get_model()) + items.append(wd) + + def take_elem(word_dis): + return word_dis.distance + + items.sort(key=take_elem, reverse=True) + + results = [] + size = len(items) + for i in range(top_k): + if i >= size: + break + results.append({"word": items[i].word, "similarity": items[i].distance}) + + return results + + def cal_query_doc_similarity(self, query, document): + """ + This interface calculates the similarity between query and document. + + Args: + query(str): the input query text. + document(str): the input document text. + + Returns: + lda_sim(float): likelihood based similarity between query and document based on LDA. + """ + q_tokens = self.__tokenizer.tokenize(query) + d_tokens = self.__tokenizer.tokenize(document) + + doc = LDADoc() + self.__engine.infer(d_tokens, doc) + doc_topic_dist = doc.sparse_topic_dist() + + sm = SemanticMatching() + lda_sim = sm.likelihood_based_similarity(q_tokens, doc_topic_dist, self.__engine.get_model()) + + return lda_sim + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + doc = LDADoc() + self.__engine.infer(tokens, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns the first k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and + corresponding probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/lda_webpage/sampler.py b/modules/text/semantic_model/lda_webpage/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..24098ea0d8beab3cc8aaa1f6ec9743cb99fc31ee --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/sampler.py @@ -0,0 +1,286 @@ +import os + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from lda_webpage.document import LDADoc, SLDADoc, Token, Sentence +from lda_webpage.vose_alias import VoseAlias +from lda_webpage.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/lda_webpage/semantic_matching.py b/modules/text/semantic_model/lda_webpage/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_webpage/semantic_matching.py rename to modules/text/semantic_model/lda_webpage/semantic_matching.py diff --git a/modules/text/semantic_model/lda_webpage/tokenizer.py b/modules/text/semantic_model/lda_webpage/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..585aed885b63b0e2a2d450b77a6d018615c86b04 --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/tokenizer.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from paddlehub.common.logger import logger + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/lda_webpage/util.py b/modules/text/semantic_model/lda_webpage/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_webpage/util.py rename to modules/text/semantic_model/lda_webpage/util.py diff --git a/modules/text/semantic_model/lda_webpage/vocab.py b/modules/text/semantic_model/lda_webpage/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/lda_webpage/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/lda_webpage/vose_alias.py b/modules/text/semantic_model/lda_webpage/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_webpage/vose_alias.py rename to modules/text/semantic_model/lda_webpage/vose_alias.py diff --git a/hub_module/modules/text/semantic_model/rbt3/README.md b/modules/text/semantic_model/rbt3/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/rbt3/README.md rename to modules/text/semantic_model/rbt3/README.md diff --git a/hub_module/modules/text/semantic_model/lda_webpage/__init__.py b/modules/text/semantic_model/rbt3/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/lda_webpage/__init__.py rename to modules/text/semantic_model/rbt3/__init__.py diff --git a/hub_module/modules/text/semantic_model/rbt3/__init__.py b/modules/text/semantic_model/rbt3/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/rbt3/__init__.py rename to modules/text/semantic_model/rbt3/model/__init__.py diff --git a/modules/text/semantic_model/rbt3/model/bert.py b/modules/text/semantic_model/rbt3/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..9afb90c193f8c2898d99bc55d1678ef1f62b2303 --- /dev/null +++ b/modules/text/semantic_model/rbt3/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from rbt3.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/rbt3/model/transformer_encoder.py b/modules/text/semantic_model/rbt3/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/rbt3/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/rbt3/module.py b/modules/text/semantic_model/rbt3/module.py new file mode 100644 index 0000000000000000000000000000000000000000..60eadf060d568c1fec6bb4cce4a7fbaac39045ed --- /dev/null +++ b/modules/text/semantic_model/rbt3/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from rbt3.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="rbt3", + version="1.0.0", + summary="rbt3, 3-layer, 768-hidden, 12-heads, 38M parameters ", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class BertWwm(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config_rbt3.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/rbtl3/README.md b/modules/text/semantic_model/rbtl3/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/rbtl3/README.md rename to modules/text/semantic_model/rbtl3/README.md diff --git a/hub_module/modules/text/semantic_model/rbt3/model/__init__.py b/modules/text/semantic_model/rbtl3/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/rbt3/model/__init__.py rename to modules/text/semantic_model/rbtl3/__init__.py diff --git a/hub_module/modules/text/semantic_model/rbtl3/__init__.py b/modules/text/semantic_model/rbtl3/model/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/rbtl3/__init__.py rename to modules/text/semantic_model/rbtl3/model/__init__.py diff --git a/modules/text/semantic_model/rbtl3/model/bert.py b/modules/text/semantic_model/rbtl3/model/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..e61be4035cf8379f02dc588eb2420f9699449629 --- /dev/null +++ b/modules/text/semantic_model/rbtl3/model/bert.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BERT model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six +import json + +import paddle.fluid as fluid + +from rbtl3.model.transformer_encoder import encoder, pre_process_layer + + +class BertConfig(object): + def __init__(self, config_path): + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + try: + with open(config_path) as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing bert model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + return self._config_dict[key] + + def print_config(self): + for arg, value in sorted(six.iteritems(self._config_dict)): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +class BertModel(object): + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + + self._emb_size = config['hidden_size'] + self._n_layer = config['num_hidden_layers'] + self._n_head = config['num_attention_heads'] + self._voc_size = config['vocab_size'] + self._max_position_seq_len = config['max_position_embeddings'] + self._sent_types = config['type_vocab_size'] + self._hidden_act = config['hidden_act'] + self._prepostprocess_dropout = config['hidden_dropout_prob'] + self._attention_dropout = config['attention_probs_dropout_prob'] + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._dtype = "float16" if use_fp16 else "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config['initializer_range']) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + size=[self._voc_size, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + position_emb_out = fluid.layers.embedding( + input=position_ids, + size=[self._max_position_seq_len, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + sent_emb_out = fluid.layers.embedding( + sentence_ids, + size=[self._sent_types, self._emb_size], + dtype=self._dtype, + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer(emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') + + if self._dtype == "float16": + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._emb_size // self._n_head, + d_value=self._emb_size // self._n_head, + d_model=self._emb_size, + d_inner_hid=self._emb_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd="", + postprocess_cmd="dan", + param_initializer=self._param_initializer, + name='encoder') + + def get_sequence_output(self): + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_pretraining_output(self, mask_label, mask_pos, labels): + """Get the loss & accuracy for pretraining""" + + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + + # extract the first token feature in each sentence + next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + # transform: fc + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=self._hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + # transform: layer norm + mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + next_sent_fc_out = fluid.layers.fc( + input=next_sent_feat, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self._param_initializer), + bias_attr="next_sent_fc.b_0") + + next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( + logits=next_sent_fc_out, label=labels, return_softmax=True) + + next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) + + mean_next_sent_loss = fluid.layers.mean(next_sent_loss) + + loss = mean_next_sent_loss + mean_mask_lm_loss + return next_sent_acc, mean_mask_lm_loss, loss diff --git a/modules/text/semantic_model/rbtl3/model/transformer_encoder.py b/modules/text/semantic_model/rbtl3/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..53051cde80308a17a30d9b92de11c712b63da406 --- /dev/null +++ b/modules/text/semantic_model/rbtl3/model/transformer_encoder.py @@ -0,0 +1,288 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.))) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(enc_input, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_att'), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att') + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, name=name + '_pre_ffn'), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + return post_process_layer(attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn') + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name=''): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i)) + enc_input = enc_output + enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder") + + return enc_output diff --git a/modules/text/semantic_model/rbtl3/module.py b/modules/text/semantic_model/rbtl3/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b6cc1270fd6771aefcd75a0fc4bb7ad9baf7d04c --- /dev/null +++ b/modules/text/semantic_model/rbtl3/module.py @@ -0,0 +1,74 @@ +# coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo + +from rbtl3.model.bert import BertConfig, BertModel + + +@moduleinfo( + name="rbtl3", + version="1.0.0", + summary="rbtl3, 3-layer, 1024-hidden, 16-heads, 61M parameters ", + author="ymcui", + author_email="ymcui@ir.hit.edu.cn", + type="nlp/semantic_model", +) +class BertWwm(TransformerModule): + def _initialize(self): + self.MAX_SEQ_LEN = 512 + self.params_path = os.path.join(self.directory, "assets", "params") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + + bert_config_path = os.path.join(self.directory, "assets", "bert_config_rbtl3.json") + self.bert_config = BertConfig(bert_config_path) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + bert = BertModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.bert_config, + use_fp16=False) + pooled_output = bert.get_pooled_output() + sequence_output = bert.get_sequence_output() + return pooled_output, sequence_output + + +if __name__ == '__main__': + test_module = BertWwm() diff --git a/hub_module/modules/text/semantic_model/simnet_bow/README.md b/modules/text/semantic_model/simnet_bow/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/simnet_bow/README.md rename to modules/text/semantic_model/simnet_bow/README.md diff --git a/hub_module/modules/text/semantic_model/rbtl3/model/__init__.py b/modules/text/semantic_model/simnet_bow/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/rbtl3/model/__init__.py rename to modules/text/semantic_model/simnet_bow/__init__.py diff --git a/hub_module/modules/text/semantic_model/simnet_bow/assets/params.txt b/modules/text/semantic_model/simnet_bow/assets/params.txt similarity index 100% rename from hub_module/modules/text/semantic_model/simnet_bow/assets/params.txt rename to modules/text/semantic_model/simnet_bow/assets/params.txt diff --git a/hub_module/modules/text/semantic_model/simnet_bow/assets/vocab.txt b/modules/text/semantic_model/simnet_bow/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/semantic_model/simnet_bow/assets/vocab.txt rename to modules/text/semantic_model/simnet_bow/assets/vocab.txt diff --git a/modules/text/semantic_model/simnet_bow/module.py b/modules/text/semantic_model/simnet_bow/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ef4a49935ff0c61c9b144d4d1fd31dc6c1ce0306 --- /dev/null +++ b/modules/text/semantic_model/simnet_bow/module.py @@ -0,0 +1,396 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import json +import math +import os +import six + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix, get_variable_info +from paddlehub.common.utils import sys_stdin_encoding +from paddlehub.io.parser import txt_parser +from paddlehub.module.module import serving +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable + +from simnet_bow.processor import load_vocab, preprocess, postprocess + + +class DataFormatError(Exception): + def __init__(self, *args): + self.args = args + + +@moduleinfo( + name="simnet_bow", + version="1.2.0", + summary="Baidu's open-source similarity network model based on bow_pairwise.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class SimnetBow(hub.Module): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab = load_vocab(self.vocab_path) + self.param_file = os.path.join(self.directory, "assets", "params.txt") + self._word_seg_module = None + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def _set_config(self): + """ + predictor config setting + """ + cpu_config = AnalysisConfig(self.pretrained_model_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + cpu_config.switch_ir_optim(False) + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.pretrained_model_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained simnet_bow + + Args: + trainable(bool): whether fine-tune the pretrained parameters of simnet_bow or not。 + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of simnet_bow (words) + outputs(dict): the output variables of input words (word embeddings) and sequence lenght of the first input_text + main_program(Program): the main_program of simnet_bow with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="emb", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 500002 + emb_1 = fluid.layers.embedding( + input=text_1, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the senta_lstm pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = {} + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + def texts2tensor(self, texts): + """ + Tranform the texts(dict) to PaddleTensor + Args: + texts(list): texts + Returns: + tensor(PaddleTensor): tensor with texts data + """ + lod = [0] + data = [] + for i, text in enumerate(texts): + data += text['processed'] + lod.append(len(text['processed']) + lod[i]) + tensor = PaddleTensor(np.array(data).astype('int64')) + tensor.name = "words" + tensor.lod = [lod] + tensor.shape = [lod[-1], 1] + return tensor + + def to_unicode(self, texts): + """ + Convert each element's type(str) of texts(list) to unicode in python2.7 + Args: + texts(list): each element's type is str in python2.7 + Returns: + texts(list): each element's type is unicode in python2.7 + """ + + if six.PY2: + unicode_texts = [] + for text in texts: + if isinstance(text, six.string_types): + unicode_texts.append(text.decode(sys_stdin_encoding()).decode("utf8")) + else: + unicode_texts.append(text) + texts = unicode_texts + return texts + + def check_data(self, texts=[], data={}): + """ + check input data + Args: + texts(list): the input texts to be predicted which the first element is text_1(list) + and the second element is text_2(list), such as [['这道题很难'], ['这道题不简单']] + if texts not data. + data(dict): key must be 'text_1' and 'text_2', value is the texts(list) to be predicted + Returns: + results(dict): predicted data + """ + predicted_data = {'text_1': [], 'text_2': []} + if texts != [] and isinstance(texts, list) and len(texts) == 2 and (len(texts[0]) == len( + texts[1])) and texts[0] and texts[1] and data == {}: + + predicted_data['text_1'] = texts[0] + predicted_data['text_2'] = texts[1] + + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text_1', None), list) and isinstance( + data.get('text_2', None), list) and (len(data['text_1']) == len( + data['text_2'])) and data['text_1'] and data['text_2']: + + predicted_data = data + + else: + raise ValueError("The input data is inconsistent with expectations.") + + return predicted_data + + @serving + def similarity(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the sentiment prediction results results with the texts as input + Args: + texts(list): the input texts to be predicted which the first element is text_1(list) + and the second element is text_2(list), such as [['这道题很难'], ['这道题不简单']] + if texts not data. + data(dict): key must be 'text_1' and 'text_2', value is the texts(list) to be predicted + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + Returns: + results(list): the word segmentation results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + data = self.check_data(texts, data) + + start_idx = 0 + iteration = int(math.ceil(len(data['text_1']) / batch_size)) + results = [] + for i in range(iteration): + batch_data = {'text_1': [], 'text_2': []} + if i < (iteration - 1): + batch_data['text_1'] = data['text_1'][start_idx:(start_idx + batch_size)] + batch_data['text_2'] = data['text_2'][start_idx:(start_idx + batch_size)] + else: + batch_data['text_1'] = data['text_1'][start_idx:(start_idx + batch_size)] + batch_data['text_2'] = data['text_2'][start_idx:(start_idx + batch_size)] + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, self.vocab, batch_data, use_gpu, batch_size) + + tensor_words_1 = self.texts2tensor(processed_results["text_1"]) + tensor_words_2 = self.texts2tensor(processed_results["text_2"]) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words_1, tensor_words_2]) + else: + batch_out = self.cpu_predictor.run([tensor_words_1, tensor_words_2]) + batch_result = postprocess(batch_out[1], processed_results) + results += batch_result + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the simnet_bow module.", prog='hub run simnet_bow', usage='%(prog)s', add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.similarity(data=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) + + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") + self.arg_input_group.add_argument('--text_1', type=str, default=None, help="text to predict") + self.arg_input_group.add_argument('--text_2', type=str, default=None, help="text to predict") + + def check_input_data(self, args): + input_data = {} + if args.input_file: + if not os.path.exists(args.input_file): + print("File %s is not exist." % args.input_file) + raise RuntimeError + else: + input_data = txt_parser.parse(args.input_file, use_strip=True) + elif args.text_1 and args.text_2: + if args.text_1.strip() != '' and args.text_2.strip() != '': + if six.PY2: + input_data = { + "text_1": [args.text_1.strip().decode(sys_stdin_encoding()).decode("utf8")], + "text_2": [args.text_2.strip().decode(sys_stdin_encoding()).decode("utf8")] + } + else: + input_data = {"text_1": [args.text_1], "text_2": [args.text_2]} + else: + print("ERROR: The input data is inconsistent with expectations.") + + if input_data == {}: + print("ERROR: The input data is inconsistent with expectations.") + raise DataFormatError + + return input_data + + def get_vocab_path(self): + """ + Get the path to the vocabulary whih was used to pretrain + Returns: + self.vocab_path(str): the path to vocabulary + """ + return self.vocab_path + + +if __name__ == "__main__": + + simnet_bow = SimnetBow() + inputs, outputs, program = simnet_bow.context(num_slots=3) + print(inputs) + print(outputs) + + # Data to be predicted + test_text_1 = ["这道题太难了", "这道题太难了", "这道题太难了"] + test_text_2 = ["这道题是上一年的考题", "这道题不简单", "这道题很有意思"] + + inputs = {"text_1": test_text_1, "text_2": test_text_2} + results = simnet_bow.similarity(data=inputs, batch_size=2) + print(results) + max_score = -1 + result_text = "" + for result in results: + if result['similarity'] > max_score: + max_score = result['similarity'] + result_text = result['text_2'] + + print("The most matching with the %s is %s" % (test_text_1[0], result_text)) diff --git a/modules/text/semantic_model/simnet_bow/processor.py b/modules/text/semantic_model/simnet_bow/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..a4362e854ebc009da97998ebecc33a338c78641f --- /dev/null +++ b/modules/text/semantic_model/simnet_bow/processor.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +import io + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + wid = 0 + for line in f: + line = line.rstrip() + parts = line.split('\t') + vocab[parts[0]] = int(parts[1]) + vocab[""] = len(vocab) + return vocab + + +text_a_key = "text_1" +text_b_key = "text_2" + + +def preprocess(lac, word_dict, data_dict, use_gpu=False, batch_size=1): + """ + Convert the word str to word id and pad the text + """ + result = {text_a_key: [], text_b_key: []} + processed_a = lac.lexical_analysis(data={'text': data_dict[text_a_key]}, use_gpu=use_gpu, batch_size=batch_size) + processed_b = lac.lexical_analysis(data={'text': data_dict[text_b_key]}, use_gpu=use_gpu) + unk_id = word_dict[''] + for index, (text_a, text_b) in enumerate(zip(processed_a, processed_b)): + result_i = {'processed': []} + result_i['origin'] = data_dict[text_a_key][index] + for word in text_a['word']: + _index = word_dict.get(word, unk_id) + result_i['processed'].append(_index) + result[text_a_key].append(result_i) + + result_i = {'processed': []} + result_i['origin'] = data_dict[text_b_key][index] + for word in text_b['word']: + _index = word_dict.get(word, unk_id) + result_i['processed'].append(_index) + result[text_b_key].append(result_i) + return result + + +def postprocess(predict_out, data_info): + """ + Convert model's output tensor to pornography label + """ + result = [] + pred = predict_out.as_ndarray() + for index in range(len(data_info[text_a_key])): + result_i = {} + result_i[text_a_key] = data_info[text_a_key][index]['origin'] + result_i[text_b_key] = data_info[text_b_key][index]['origin'] + result_i['similarity'] = float('%.4f' % pred[index][0]) + result.append(result_i) + return result diff --git a/hub_module/modules/text/semantic_model/slda_news/README.md b/modules/text/semantic_model/slda_news/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/slda_news/README.md rename to modules/text/semantic_model/slda_news/README.md diff --git a/hub_module/modules/text/semantic_model/simnet_bow/__init__.py b/modules/text/semantic_model/slda_news/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/simnet_bow/__init__.py rename to modules/text/semantic_model/slda_news/__init__.py diff --git a/hub_module/modules/text/semantic_model/slda_news/config.py b/modules/text/semantic_model/slda_news/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_news/config.py rename to modules/text/semantic_model/slda_news/config.py diff --git a/modules/text/semantic_model/slda_news/document.py b/modules/text/semantic_model/slda_news/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/slda_news/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/slda_news/inference_engine.py b/modules/text/semantic_model/slda_news/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..7f202111f7b665b692027577c71d19f030d7aee0 --- /dev/null +++ b/modules/text/semantic_model/slda_news/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from slda_news.config import ModelConfig +from slda_news.util import load_prototxt, fix_random_seed, rand_k +from slda_news.model import TopicModel +from slda_news.sampler import GibbsSampler, MHSampler +from slda_news.document import LDADoc, SLDADoc, Token, Sentence +from slda_news.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/slda_news/model.py b/modules/text/semantic_model/slda_news/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f63ca92e0a6c63f44f2e0281d8382dd19e394cd8 --- /dev/null +++ b/modules/text/semantic_model/slda_news/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_news.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/slda_news/module.py b/modules/text/semantic_model/slda_news/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f488ec25489eac395eae7e83dc86b49bbfe27d52 --- /dev/null +++ b/modules/text/semantic_model/slda_news/module.py @@ -0,0 +1,100 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from slda_news.inference_engine import InferenceEngine +from slda_news.document import SLDADoc +from slda_news.semantic_matching import SemanticMatching, WordAndDis +from slda_news.tokenizer import LACTokenizer, SimpleTokenizer +from slda_news.config import ModelType +from slda_news.vocab import Vocab, WordCount + + +@moduleinfo( + name="slda_news", + version="1.0.0", + summary= + "This is a PaddleHub Module for SLDA topic model in news dataset, where we can infer the topic distribution of document.", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'news') + self.conf_file = 'slda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish Initialization.") + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + sentences = [] + sent = [] + for i in range(len(tokens)): + sent.append(tokens[i]) + if len(sent) % 5 == 0: + sentences.append(sent) + sent = [] + if len(sent) > 0: + sentences.append(sent) + doc = SLDADoc() + self.__engine.infer(sentences, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns the k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and corresponding + probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/slda_news/sampler.py b/modules/text/semantic_model/slda_news/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..c36a881ad39dd8ddbf9fea1cbe9ed7ae9af7efc9 --- /dev/null +++ b/modules/text/semantic_model/slda_news/sampler.py @@ -0,0 +1,286 @@ +import os + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_news.document import LDADoc, SLDADoc, Token, Sentence +from slda_news.vose_alias import VoseAlias +from slda_news.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_news/semantic_matching.py b/modules/text/semantic_model/slda_news/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_news/semantic_matching.py rename to modules/text/semantic_model/slda_news/semantic_matching.py diff --git a/modules/text/semantic_model/slda_news/tokenizer.py b/modules/text/semantic_model/slda_news/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..585aed885b63b0e2a2d450b77a6d018615c86b04 --- /dev/null +++ b/modules/text/semantic_model/slda_news/tokenizer.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from paddlehub.common.logger import logger + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_news/util.py b/modules/text/semantic_model/slda_news/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_news/util.py rename to modules/text/semantic_model/slda_news/util.py diff --git a/modules/text/semantic_model/slda_news/vocab.py b/modules/text/semantic_model/slda_news/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/slda_news/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_news/vose_alias.py b/modules/text/semantic_model/slda_news/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_news/vose_alias.py rename to modules/text/semantic_model/slda_news/vose_alias.py diff --git a/hub_module/modules/text/semantic_model/slda_novel/README.md b/modules/text/semantic_model/slda_novel/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/slda_novel/README.md rename to modules/text/semantic_model/slda_novel/README.md diff --git a/hub_module/modules/text/semantic_model/slda_news/__init__.py b/modules/text/semantic_model/slda_novel/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_news/__init__.py rename to modules/text/semantic_model/slda_novel/__init__.py diff --git a/hub_module/modules/text/semantic_model/slda_novel/config.py b/modules/text/semantic_model/slda_novel/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_novel/config.py rename to modules/text/semantic_model/slda_novel/config.py diff --git a/modules/text/semantic_model/slda_novel/document.py b/modules/text/semantic_model/slda_novel/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/slda_novel/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/slda_novel/inference_engine.py b/modules/text/semantic_model/slda_novel/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..d206dbd702e82b889ef864673d093c10094bd937 --- /dev/null +++ b/modules/text/semantic_model/slda_novel/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from slda_novel.config import ModelConfig +from slda_novel.util import load_prototxt, fix_random_seed, rand_k +from slda_novel.model import TopicModel +from slda_novel.sampler import GibbsSampler, MHSampler +from slda_novel.document import LDADoc, SLDADoc, Token, Sentence +from slda_novel.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/slda_novel/model.py b/modules/text/semantic_model/slda_novel/model.py new file mode 100644 index 0000000000000000000000000000000000000000..cd4e6bab5f4701d4481f064c9ea1bbef829b37db --- /dev/null +++ b/modules/text/semantic_model/slda_novel/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_novel.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/slda_novel/module.py b/modules/text/semantic_model/slda_novel/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e5a2568a23affb5f3b9c1b48576471aa9f67e26c --- /dev/null +++ b/modules/text/semantic_model/slda_novel/module.py @@ -0,0 +1,102 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from slda_novel.inference_engine import InferenceEngine +from slda_novel.document import SLDADoc +from slda_novel.semantic_matching import SemanticMatching, WordAndDis +from slda_novel.tokenizer import LACTokenizer, SimpleTokenizer +from slda_novel.config import ModelType +from slda_novel.vocab import Vocab, WordCount + + +@moduleinfo( + name="slda_novel", + version="1.0.0", + summary= + "This is a PaddleHub Module for SLDA topic model in novel dataset, where we can infer the topic distribution of document.", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'novel') + self.conf_file = 'slda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish Initialization.") + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + sentences = [] + sent = [] + for i in range(len(tokens)): + sent.append(tokens[i]) + if len(sent) % 5 == 0: + sentences.append(sent) + sent = [] + if len(sent) > 0: + sentences.append(sent) + + doc = SLDADoc() + self.__engine.infer(sentences, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns the k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and corresponding + probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/slda_novel/sampler.py b/modules/text/semantic_model/slda_novel/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..bdf192fbbfac94d942c6483fd864ad9aad7dc6d5 --- /dev/null +++ b/modules/text/semantic_model/slda_novel/sampler.py @@ -0,0 +1,286 @@ +import os + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_novel.document import LDADoc, SLDADoc, Token, Sentence +from slda_novel.vose_alias import VoseAlias +from slda_novel.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_novel/semantic_matching.py b/modules/text/semantic_model/slda_novel/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_novel/semantic_matching.py rename to modules/text/semantic_model/slda_novel/semantic_matching.py diff --git a/modules/text/semantic_model/slda_novel/tokenizer.py b/modules/text/semantic_model/slda_novel/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..585aed885b63b0e2a2d450b77a6d018615c86b04 --- /dev/null +++ b/modules/text/semantic_model/slda_novel/tokenizer.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from paddlehub.common.logger import logger + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_novel/util.py b/modules/text/semantic_model/slda_novel/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_novel/util.py rename to modules/text/semantic_model/slda_novel/util.py diff --git a/modules/text/semantic_model/slda_novel/vocab.py b/modules/text/semantic_model/slda_novel/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/slda_novel/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_novel/vose_alias.py b/modules/text/semantic_model/slda_novel/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_novel/vose_alias.py rename to modules/text/semantic_model/slda_novel/vose_alias.py diff --git a/hub_module/modules/text/semantic_model/slda_webpage/README.md b/modules/text/semantic_model/slda_webpage/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/slda_webpage/README.md rename to modules/text/semantic_model/slda_webpage/README.md diff --git a/hub_module/modules/text/semantic_model/slda_novel/__init__.py b/modules/text/semantic_model/slda_webpage/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_novel/__init__.py rename to modules/text/semantic_model/slda_webpage/__init__.py diff --git a/hub_module/modules/text/semantic_model/slda_webpage/config.py b/modules/text/semantic_model/slda_webpage/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_webpage/config.py rename to modules/text/semantic_model/slda_webpage/config.py diff --git a/modules/text/semantic_model/slda_webpage/document.py b/modules/text/semantic_model/slda_webpage/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/slda_webpage/inference_engine.py b/modules/text/semantic_model/slda_webpage/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..c62c87096ff658e20cfaa4ed24589cec4ade2f2c --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from slda_webpage.config import ModelConfig +from slda_webpage.util import load_prototxt, fix_random_seed, rand_k +from slda_webpage.model import TopicModel +from slda_webpage.sampler import GibbsSampler, MHSampler +from slda_webpage.document import LDADoc, SLDADoc, Token, Sentence +from slda_webpage.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/slda_webpage/model.py b/modules/text/semantic_model/slda_webpage/model.py new file mode 100644 index 0000000000000000000000000000000000000000..e3e78020a7e4b3f273a22e437b0ae03ea0ed23f5 --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_webpage.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/slda_webpage/module.py b/modules/text/semantic_model/slda_webpage/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e66339f3e8ae6be4766c06cbd9b0317734a5601d --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/module.py @@ -0,0 +1,102 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from slda_webpage.inference_engine import InferenceEngine +from slda_webpage.document import SLDADoc +from slda_webpage.semantic_matching import SemanticMatching, WordAndDis +from slda_webpage.tokenizer import LACTokenizer, SimpleTokenizer +from slda_webpage.config import ModelType +from slda_webpage.vocab import Vocab, WordCount + + +@moduleinfo( + name="slda_webpage", + version="1.0.0", + summary= + "This is a PaddleHub Module for SLDA topic model in webpage dataset, where we can infer the topic distribution of document.", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'webpage') + self.conf_file = 'slda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish Initialization.") + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + sentences = [] + sent = [] + for i in range(len(tokens)): + sent.append(tokens[i]) + if len(sent) % 5 == 0: + sentences.append(sent) + sent = [] + if len(sent) > 0: + sentences.append(sent) + + doc = SLDADoc() + self.__engine.infer(sentences, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns the k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and + corresponding probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/slda_webpage/sampler.py b/modules/text/semantic_model/slda_webpage/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..80a8fada2c4088ded544cdf6b3c7f82fe191ab80 --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/sampler.py @@ -0,0 +1,286 @@ +import os + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_webpage.document import LDADoc, SLDADoc, Token, Sentence +from slda_webpage.vose_alias import VoseAlias +from slda_webpage.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_webpage/semantic_matching.py b/modules/text/semantic_model/slda_webpage/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_webpage/semantic_matching.py rename to modules/text/semantic_model/slda_webpage/semantic_matching.py diff --git a/modules/text/semantic_model/slda_webpage/tokenizer.py b/modules/text/semantic_model/slda_webpage/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..585aed885b63b0e2a2d450b77a6d018615c86b04 --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/tokenizer.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from paddlehub.common.logger import logger + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_webpage/util.py b/modules/text/semantic_model/slda_webpage/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_webpage/util.py rename to modules/text/semantic_model/slda_webpage/util.py diff --git a/modules/text/semantic_model/slda_webpage/vocab.py b/modules/text/semantic_model/slda_webpage/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/slda_webpage/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_webpage/vose_alias.py b/modules/text/semantic_model/slda_webpage/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_webpage/vose_alias.py rename to modules/text/semantic_model/slda_webpage/vose_alias.py diff --git a/hub_module/modules/text/semantic_model/slda_weibo/README.md b/modules/text/semantic_model/slda_weibo/README.md similarity index 100% rename from hub_module/modules/text/semantic_model/slda_weibo/README.md rename to modules/text/semantic_model/slda_weibo/README.md diff --git a/hub_module/modules/text/semantic_model/slda_webpage/__init__.py b/modules/text/semantic_model/slda_weibo/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_webpage/__init__.py rename to modules/text/semantic_model/slda_weibo/__init__.py diff --git a/hub_module/modules/text/semantic_model/slda_weibo/config.py b/modules/text/semantic_model/slda_weibo/config.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_weibo/config.py rename to modules/text/semantic_model/slda_weibo/config.py diff --git a/modules/text/semantic_model/slda_weibo/document.py b/modules/text/semantic_model/slda_weibo/document.py new file mode 100644 index 0000000000000000000000000000000000000000..4476230a5c9bc8d545b52386dbf00a201e59b468 --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/document.py @@ -0,0 +1,176 @@ +import numpy as np + + +class Topic(object): + """Basic data structure of topic, contains topic id and + corresponding probability. + """ + + def __init__(self, tid, prob): + self.tid = tid # topic id + self.prob = prob # topic probability + + +class Token(object): + """Basic storage unit of LDA documents, contains word id + and corresponding topic. + """ + + def __init__(self, topic, id): + self.topic = topic + self.id = id + + +class Sentence(object): + """Basic storage unit of SentenceLDA documents, contains word ids + of the sentence and its corresponding topic id. + """ + + def __init__(self, topic, tokens): + self.topic = topic + self.tokens = tokens + + +class LDADoc(object): + """The storage structure of LDA model's inference result. + """ + + def __init__(self): + self._num_topics = None # Number of topics. + self._num_accum = None # Number of accumulated sample rounds. + self._alpha = None # Document prior parameter. + self._tokens = None # Storage structure of inference results. + self._topic_sum = None # Document's topic sum in one round samples. + self._accum_topic_sum = None # Accumulated results of topic sum. + + def init(self, num_topics): + """Initialize the LDADoc according to num_topics. + """ + self._num_topics = num_topics + self._num_accum = 0 + self._tokens = [] + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_token(self, token): + """Add new word to current LDADoc. + Arg: + token: Token class object. + """ + assert token.topic >= 0, "Topic %d out of range!" % token.topic + assert token.topic < self._num_topics, "Topic %d out of range!" % token.topic + self._tokens.append(token) + self._topic_sum[token.topic] += 1 + + def token(self, index): + return self._tokens[index] + + def set_topic(self, index, new_topic): + """Set the index word's topic to new_topic, and update the corresponding + topic distribution. + """ + assert new_topic >= 0, "Topic %d out of range!" % new_topic + assert new_topic < self._num_topics, "Topic %d out of range!" % new_topic + old_topic = self._tokens[index].topic + if new_topic == old_topic: + return + self._tokens[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def set_alpha(self, alpha): + self._alpha = alpha + + def size(self): + """Return number of words in LDADoc. + """ + return len(self._tokens) + + def topic_sum(self, topic_id): + return self._topic_sum[topic_id] + + def sparse_topic_dist(self, sort=True): + """Return the topic distribution of documents in sparse format. + By default, it is sorted according to the topic probability + under the descending order. + """ + topic_dist = [] + sum_ = np.sum(self._accum_topic_sum) + if sum_ == 0: + return topic_dist + for i in range(0, self._num_topics): + if self._accum_topic_sum[i] == 0: + continue + topic_dist.append(Topic(i, self._accum_topic_sum[i] * 1.0 / sum_)) + if sort: + + def take_elem(topic): + return topic.prob + + topic_dist.sort(key=take_elem, reverse=True) + if topic_dist is None: + topic_dist = [] + + return topic_dist + + def dense_topic_dist(self): + """Return the distribution of document topics in dense format, + taking into account the prior parameter alpha. + """ + dense_dist = np.zeros(self._num_topics) + if self.size() == 0: + return dense_dist + dense_dist = (self._accum_topic_sum * 1.0 / self._num_accum + self._alpha) / ( + self.size() + self._alpha * self._num_topics) + return dense_dist + + def accumulate_topic_num(self): + self._accum_topic_sum += self._topic_sum + self._num_accum += 1 + + +class SLDADoc(LDADoc): + """Sentence LDA Document, inherited from LDADoc. + Add add_sentence interface. + """ + + def __init__(self): + super().__init__() + self.__sentences = None + + def init(self, num_topics): + """Initialize the SLDADoc according to num_topics. + """ + self._num_topics = num_topics + self.__sentences = [] + self._num_accum = 0 + self._topic_sum = np.zeros(self._num_topics) + self._accum_topic_sum = np.zeros(self._num_topics) + + def add_sentence(self, sent): + """Add new sentence to current SLDADoc. + Arg: + sent: Sentence class object. + """ + assert sent.topic >= 0, "Topic %d out of range!" % (sent.topic) + assert sent.topic < self._num_topics, "Topic %d out of range!" % (sent.topic) + self.__sentences.append(sent) + self._topic_sum[sent.topic] += 1 + + def set_topic(self, index, new_topic): + assert new_topic >= 0, "Topic %d out of range!" % (new_topic) + assert new_topic < self._num_topics, "Topic %d out of range!" % (new_topic) + old_topic = self.__sentences[index].topic + if new_topic == old_topic: + return + self.__sentences[index].topic = new_topic + self._topic_sum[old_topic] -= 1 + self._topic_sum[new_topic] += 1 + + def size(self): + """Return number of sentences in SLDADoc. + """ + return len(self.__sentences) + + def sent(self, index): + return self.__sentences[index] diff --git a/modules/text/semantic_model/slda_weibo/inference_engine.py b/modules/text/semantic_model/slda_weibo/inference_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..f7eefa7e5e914e419a898694da162eb947f8dd2f --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/inference_engine.py @@ -0,0 +1,91 @@ +import os + +from paddlehub.common.logger import logger + +from slda_weibo.config import ModelConfig +from slda_weibo.util import load_prototxt, fix_random_seed, rand_k +from slda_weibo.model import TopicModel +from slda_weibo.sampler import GibbsSampler, MHSampler +from slda_weibo.document import LDADoc, SLDADoc, Token, Sentence +from slda_weibo.vocab import OOV + + +class SamplerType: + GibbsSampling = 0 + MetropolisHastings = 1 + + +class InferenceEngine(object): + def __init__(self, model_dir, conf_file, type=SamplerType.MetropolisHastings): + # Read model configuration. + config = ModelConfig() + conf_file_path = os.path.join(model_dir, conf_file) + load_prototxt(conf_file_path, config) + self.__model = TopicModel(model_dir, config) + self.__config = config + + # Initialize the sampler according to the configuration. + if type == SamplerType.GibbsSampling: + self.__sampler = GibbsSampler(self.__model) + elif type == SamplerType.MetropolisHastings: + self.__sampler = MHSampler(self.__model) + + def infer(self, input, doc): + """Perform LDA topic inference on input, and store the results in doc. + Args: + input: a list of strings after tokenization. + doc: LDADoc type or SLDADoc type. + """ + fix_random_seed() + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for token in input: + id_ = self.__model.term_id(token) + if id_ != OOV: + init_topic = rand_k(self.__model.num_topics()) + doc.add_token(Token(init_topic, id_)) + self.lda_infer(doc, 20, 50) + elif isinstance(doc, SLDADoc): + doc.init(self.__model.num_topics()) + doc.set_alpha(self.__model.alpha()) + for sent in input: + words = [] + for token in sent: + id_ = self.__model.term_id(token) + if id_ != OOV: + words.append(id_) + init_topic = rand_k(self.__model.num_topics()) + doc.add_sentence(Sentence(init_topic, words)) + self.slda_infer(doc, 20, 50) + else: + logger.error("Wrong Doc Type!") + + def lda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def slda_infer(self, doc, burn_in_iter, total_iter): + assert burn_in_iter >= 0 + assert total_iter > 0 + assert total_iter > burn_in_iter + + for iter_ in range(total_iter): + self.__sampler.sample_doc(doc) + if iter_ >= burn_in_iter: + doc.accumulate_topic_num() + + def model_type(self): + return self.__model.type() + + def get_model(self): + return self.__model + + def get_config(self): + return self.__config diff --git a/modules/text/semantic_model/slda_weibo/model.py b/modules/text/semantic_model/slda_weibo/model.py new file mode 100644 index 0000000000000000000000000000000000000000..500f44b554da2ca04aa34db27bbb07967ae41670 --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/model.py @@ -0,0 +1,123 @@ +import os +from collections import OrderedDict + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_weibo.vocab import Vocab, WordCount + + +class TopicModel(object): + """Storage Structure of Topic model, including vocabulary and word topic count. + """ + + def __init__(self, model_dir, config): + """ + Args: + model_dir: the path of model directory + config: ModelConfig class. + """ + self.__word_topic = None # Model parameter of word topic. + self.__vocab = Vocab() # Vocab data structure of model. + self.__num_topics = config.num_topics # Number of topics. + self.__alpha = config.alpha + self.__alpha_sum = self.__alpha * self.__num_topics + self.__beta = config.beta + self.__beta_sum = None + self.__type = config.type # Model type. + self.__topic_sum = np.zeros(self.__num_topics, dtype="int64") # Accum sum of each topic in word topic. + self.__topic_words = [[] for _ in range(self.__num_topics)] + word_topic_path = os.path.join(model_dir, config.word_topic_file) + vocab_path = os.path.join(model_dir, config.vocab_file) + self.load_model(word_topic_path, vocab_path) + + def term_id(self, term): + return self.__vocab.get_id(term) + + def load_model(self, word_topic_path, vocab_path): + + # Loading vocabulary + self.__vocab.load(vocab_path) + + self.__beta_sum = self.__beta * self.__vocab.size() + self.__word_topic = [{} for _ in range(self.__vocab.size())] # 字典列表 + self.__load_word_dict(word_topic_path) + logger.info("Model Info: #num_topics=%d #vocab_size=%d alpha=%f beta=%f" % + (self.num_topics(), self.vocab_size(), self.alpha(), self.beta())) + + def word_topic_value(self, word_id, topic_id): + """Return value of specific word under specific topic in the model. + """ + word_dict = self.__word_topic[word_id] + if topic_id not in word_dict: + return 0 + return word_dict[topic_id] + + def word_topic(self, term_id): + """Return the topic distribution of a word. + """ + return self.__word_topic[term_id] + + def topic_sum_value(self, topic_id): + return self.__topic_sum[topic_id] + + def topic_sum(self): + return self.__topic_sum + + def num_topics(self): + return self.__num_topics + + def vocab_size(self): + return self.__vocab.size() + + def alpha(self): + return self.__alpha + + def alpha_sum(self): + return self.__alpha_sum + + def beta(self): + return self.__beta + + def beta_sum(self): + return self.__beta_sum + + def type(self): + return self.__type + + def __load_word_dict(self, word_dict_path): + """Load the word topic parameters. + """ + logger.info("Loading word topic.") + with open(word_dict_path, 'r', encoding='utf-8') as f: + for line in tqdm(f.readlines()): + fields = line.strip().split(" ") + assert len(fields) > 0, "Model file format error!" + term_id = int(fields[0]) + assert term_id < self.vocab_size(), "Term id out of range!" + assert term_id >= 0, "Term id out of range!" + for i in range(1, len(fields)): + topic_count = fields[i].split(":") + assert len(topic_count) == 2, "Topic count format error!" + + topic_id = int(topic_count[0]) + assert topic_id >= 0, "Topic out of range!" + assert topic_id < self.__num_topics, "Topic out of range!" + + count = int(topic_count[1]) + assert count >= 0, "Topic count error!" + + self.__word_topic[term_id][topic_id] = count + self.__topic_sum[topic_id] += count + self.__topic_words[topic_id].append(WordCount(term_id, count)) + new_dict = OrderedDict() + for key in sorted(self.__word_topic[term_id]): + new_dict[key] = self.__word_topic[term_id][key] + self.__word_topic[term_id] = new_dict + + def get_vocab(self): + return self.__vocab.vocabulary() + + def topic_words(self): + return self.__topic_words diff --git a/modules/text/semantic_model/slda_weibo/module.py b/modules/text/semantic_model/slda_weibo/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d6587c7ed6fafe7c7bd2061afb7d9df4796b7e39 --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/module.py @@ -0,0 +1,102 @@ +import os + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.common.logger import logger + +from slda_weibo.inference_engine import InferenceEngine +from slda_weibo.document import SLDADoc +from slda_weibo.semantic_matching import SemanticMatching, WordAndDis +from slda_weibo.tokenizer import LACTokenizer, SimpleTokenizer +from slda_weibo.config import ModelType +from slda_weibo.vocab import Vocab, WordCount + + +@moduleinfo( + name="slda_weibo", + version="1.0.0", + summary= + "This is a PaddleHub Module for SLDA topic model in weibo dataset, where we can infer the topic distribution of document.", + author="DesmonDay", + author_email="", + type="nlp/semantic_model") +class TopicModel(hub.Module): + def _initialize(self): + """ + Initialize with the necessary elements. + """ + self.model_dir = os.path.join(self.directory, 'weibo') + self.conf_file = 'slda.conf' + self.__engine = InferenceEngine(self.model_dir, self.conf_file) + self.vocab_path = os.path.join(self.model_dir, 'vocab_info.txt') + lac = hub.Module(name="lac") + # self.__tokenizer = SimpleTokenizer(self.vocab_path) + self.__tokenizer = LACTokenizer(self.vocab_path, lac) + + self.vocabulary = self.__engine.get_model().get_vocab() + self.config = self.__engine.get_config() + self.topic_words = self.__engine.get_model().topic_words() + self.topic_sum_table = self.__engine.get_model().topic_sum() + + def take_elem(word_count): + return word_count.count + + for i in range(self.config.num_topics): + self.topic_words[i].sort(key=take_elem, reverse=True) + + logger.info("Finish initialization.") + + def infer_doc_topic_distribution(self, document): + """ + This interface infers the topic distribution of document. + + Args: + document(str): the input document text. + + Returns: + results(list): returns the topic distribution of document. + """ + tokens = self.__tokenizer.tokenize(document) + if tokens == []: + return [] + results = [] + sentences = [] + sent = [] + for i in range(len(tokens)): + sent.append(tokens[i]) + if len(sent) % 5 == 0: + sentences.append(sent) + sent = [] + if len(sent) > 0: + sentences.append(sent) + + doc = SLDADoc() + self.__engine.infer(sentences, doc) + topics = doc.sparse_topic_dist() + for topic in topics: + results.append({"topic id": topic.tid, "distribution": topic.prob}) + return results + + def show_topic_keywords(self, topic_id, k=10): + """ + This interface returns the k keywords under specific topic. + + Args: + topic_id(int): topic information we want to know. + k(int): top k keywords. + + Returns: + results(dict): contains specific topic's keywords and corresponding + probability. + """ + EPS = 1e-8 + results = {} + if 0 <= topic_id < self.config.num_topics: + k = min(k, len(self.topic_words[topic_id])) + for i in range(k): + prob = self.topic_words[topic_id][i].count / \ + (self.topic_sum_table[topic_id] + EPS) + results[self.vocabulary[self.topic_words[topic_id][i].word_id]] = prob + return results + else: + logger.error("%d is out of range!" % topic_id) diff --git a/modules/text/semantic_model/slda_weibo/sampler.py b/modules/text/semantic_model/slda_weibo/sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..ab9850d52c43bc792d2152798c51a61624ce16d3 --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/sampler.py @@ -0,0 +1,286 @@ +import os + +import numpy as np +from tqdm import tqdm +from paddlehub.common.logger import logger + +from slda_weibo.document import LDADoc, SLDADoc, Token, Sentence +from slda_weibo.vose_alias import VoseAlias +from slda_weibo.util import rand, rand_k + + +class Sampler(object): + def __init__(self): + pass + + def sample_doc(self, doc): + """Sample LDA or SLDA topics for documents. + """ + raise NotImplementedError + + +class MHSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + self.__topic_indexes = None + self.__alias_tables = None + self.__prob_sum = None + self.__beta_alias = VoseAlias() + self.__beta_prior_sum = None + self.__mh_steps = 2 + self.__construct_alias_table() + + def __construct_alias_table(self): + """Construct alias table for all words. + """ + logger.info("Construct alias table for alias sampling method.") + vocab_size = self.__model.vocab_size() + self.__topic_indexes = [[] for _ in range(vocab_size)] + self.__alias_tables = [VoseAlias() for _ in range(vocab_size)] + self.__prob_sum = np.zeros(vocab_size) + + # Construct each word's alias table (prior is not included). + for i in tqdm(range(vocab_size)): + dist = [] + prob_sum = 0 + for key in self.__model.word_topic(i): + topic_id = key + word_topic_count = self.__model.word_topic(i)[key] + topic_sum = self.__model.topic_sum_value(topic_id) + + self.__topic_indexes[i].append(topic_id) + q = word_topic_count / (topic_sum + self.__model.beta_sum()) + dist.append(q) + prob_sum += q + self.__prob_sum[i] = prob_sum + if len(dist) > 0: + dist = np.array(dist, dtype=np.float) + self.__alias_tables[i].initialize(dist) + + # Build prior parameter beta's alias table. + beta_dist = self.__model.beta() / (self.__model.topic_sum() + self.__model.beta_sum()) + self.__beta_prior_sum = np.sum(beta_dist) + self.__beta_alias.initialize(beta_dist) + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + new_topic = token.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, token) + new_topic = self.__word_proposal(doc, token, doc_proposed_topic) + return new_topic + + def __sample_sentence(self, doc, sent): + new_topic = sent.topic + for i in range(self.__mh_steps): + doc_proposed_topic = self.__doc_proposal(doc, sent) + new_topic = self.__word_proposal(doc, sent, doc_proposed_topic) + return new_topic + + def __doc_proposal(self, doc, token): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.token(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + dart = rand() * (doc.size() + self.__model.alpha_sum()) + if dart < doc.size(): + token_index = int(dart) + new_topic = doc.sent(token_index).topic + else: + new_topic = rand_k(self.__model.num_topics()) + + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__doc_proposal_distribution(doc, old_topic) + proposal_new = self.__doc_proposal_distribution(doc, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + + return new_topic + + def __word_proposal(self, doc, token, old_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + new_topic = self.__propose(token.id) + if new_topic != old_topic: + proposal_old = self.__word_proposal_distribution(token.id, old_topic) + proposal_new = self.__word_proposal_distribution(token.id, new_topic) + proportion_old = self.__proportional_function(doc, token, old_topic) + proportion_new = self.__proportional_function(doc, token, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + return (new_topic & mask) | (old_topic & ~mask) + return new_topic + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + new_topic = old_topic + for word_id in sent.tokens: + new_topic = self.__propose(word_id) + if new_topic != old_topic: + proportion_old = self.__proportional_function(doc, sent, old_topic) + proportion_new = self.__proportional_function(doc, sent, new_topic) + proposal_old = self.__word_proposal_distribution(word_id, old_topic) + proposal_new = self.__word_proposal_distribution(word_id, new_topic) + transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) + rejection = rand() + mask = -(rejection < transition_prob) + new_topic = (new_topic & mask) | (old_topic & ~mask) + return new_topic + + def __proportional_function(self, doc, token, new_topic): + if isinstance(doc, LDADoc) and isinstance(token, Token): + old_topic = token.topic + dt_alpha = doc.topic_sum(new_topic) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + return dt_alpha * wt_beta / t_sum_beta_sum + + elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): + sent = token + old_topic = sent.topic + result = doc.topic_sum(new_topic) + self.__model.alpha() + if new_topic == old_topic: + result -= 1 + for word_id in sent.tokens: + wt_beta = self.__model.word_topic_value(word_id, new_topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(new_topic) + self.__model.beta_sum() + if new_topic == old_topic and wt_beta > 1: + wt_beta -= 1 + t_sum_beta_sum -= 1 + result *= wt_beta / t_sum_beta_sum + return result + else: + logger.error("Wrong input argument type!") + + def __word_proposal_distribution(self, word_id, topic): + wt_beta = self.__model.word_topic_value(word_id, topic) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum_value(topic) + self.__model.beta_sum() + return wt_beta / t_sum_beta_sum + + def __doc_proposal_distribution(self, doc, topic): + return doc.topic_sum(topic) + self.__model.alpha() + + def __propose(self, word_id): + dart = rand() * (self.__prob_sum[word_id] + self.__beta_prior_sum) + if dart < self.__prob_sum[word_id]: + idx = self.__alias_tables[word_id].generate() + topic = self.__topic_indexes[word_id][idx] + else: + topic = self.__beta_alias.generate() + return topic + + +class GibbsSampler(Sampler): + def __init__(self, model): + super().__init__() + self.__model = model + + def sample_doc(self, doc): + if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_token(doc, doc.token(i)) + doc.set_topic(i, new_topic) + elif isinstance(doc, SLDADoc): + for i in range(doc.size()): + new_topic = self.__sample_sentence(doc, doc.sent(i)) + doc.set_topic(i, new_topic) + + def __sample_token(self, doc, token): + old_topic = token.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for i in range(num_topics): + dt_alpha = doc.topic_sum(i) + self.__model.alpha() + wt_beta = self.__model.word_topic_value(token.id, i) + self.__model.beta() + t_sum_beta_sum = self.__model.topic_sum(i) + self.__model.beta_sum() + if i == old_topic and wt_beta > 1: + if dt_alpha > 1: + dt_alpha -= 1 + wt_beta -= 1 + t_sum_beta_sum -= 1 + prob[i] = dt_alpha * wt_beta / t_sum_beta_sum + sum_ += prob[i] + accum_prob[i] = prob[i] if i == 0 else accum_prob[i - 1] + prob[i] + + dart = rand() * sum_ + if dart <= accum_prob[0]: + return 0 + for i in range(1, num_topics): + if accum_prob[i - 1] < dart <= accum_prob[i]: + return i + return num_topics - 1 + + def __sample_sentence(self, doc, sent): + old_topic = sent.topic + num_topics = self.__model.num_topics() + accum_prob = np.zeros(num_topics) + prob = np.zeros(num_topics) + sum_ = 0 + for t in range(num_topics): + dt_alpha = doc.topic_sum(t) + self.__model.alpha() + t_sum_beta_sum = self.__model.topic_sum(t) + self.__model.beta_sum() + if t == old_topic: + if dt_alpha > 1: + dt_alpha -= 1 + if t_sum_beta_sum > 1: + t_sum_beta_sum -= 1 + prob[t] = dt_alpha + for i in range(len(sent.tokens)): + w = sent.tokens[i] + wt_beta = self.__model.word_topic_value(w, t) + self.__model.beta() + if t == old_topic and wt_beta > 1: + wt_beta -= 1 + # Note: if the length of the sentence is too long, the probability will be + # too small and the accuracy will be lost if there are too many multiply items + prob[t] *= wt_beta / t_sum_beta_sum + sum_ += prob[t] + accum_prob[t] = prob[t] if t == 0 else accum_prob[t - 1] + prob[t] + + dart = rand() * sum + if dart <= accum_prob[0]: + return 0 + for t in range(1, num_topics): + if accum_prob[t - 1] < dart <= accum_prob[t]: + return t + return num_topics - 1 diff --git a/hub_module/modules/text/semantic_model/slda_weibo/semantic_matching.py b/modules/text/semantic_model/slda_weibo/semantic_matching.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_weibo/semantic_matching.py rename to modules/text/semantic_model/slda_weibo/semantic_matching.py diff --git a/modules/text/semantic_model/slda_weibo/tokenizer.py b/modules/text/semantic_model/slda_weibo/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..585aed885b63b0e2a2d450b77a6d018615c86b04 --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/tokenizer.py @@ -0,0 +1,127 @@ +import os + +import numpy as np +from paddlehub.common.logger import logger + + +class Tokenizer(object): + """Base tokenizer class. + """ + + def __init__(self): + pass + + def tokenize(self, text): + raise NotImplementedError + + +class SimpleTokenizer(Tokenizer): + """Simple version FMM(Forward Maximun Matching) word tokenizer. This tokenizer can only + be used in topic model demo, but not in real business application scenarios. + + Notes: This tokenizer can only recognize the words in the corresponding vocab file. + """ + + def __init__(self, vocab_path): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__load_vocab(vocab_path) + + def tokenize(self, text): + """Tokenize the input string `text`, and return the tokenize result. + """ + text_len = len(text) + result = [] + i = 0 + while i < text_len: + word = found_word = "" + # Deal with English characters. + if self.__is_eng_char(text[i]): + for j in range(i, text_len + 1): + if j < text_len and self.__is_eng_char(text[j]): + word += self.__tolower(text[j]) + else: + # Forward matching by character granularity. + if word in self.__vocab: + result.append(word) + i = j - 1 + break + else: + for j in range(i, min(i + self.__max_word_len, text_len)): + word += text[j] + if word in self.__vocab: + found_word = word + if len(found_word) > 0: + result.append(found_word) + i += len(found_word) - 1 + i += 1 + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def __is_eng_char(self, c): + """Check whether char c is an English character. + """ + return (c >= 'A' and c <= 'Z') or (c >= 'a' and c <= 'z') + + def __tolower(self, c): + """Return the lowercase character of the corresponding character, or return + the original character if there is no corresponding lowercase character. + """ + return c.lower() + + +class LACTokenizer(Tokenizer): + def __init__(self, vocab_path, lac): + super().__init__() + self.__max_word_len = 0 + self.__vocab = set() + self.__lac = lac + self.__load_vocab(vocab_path) + + def __load_vocab(self, vocab_path): + """Load the word dictionary. + """ + with open(vocab_path, 'r', encoding='utf-8') as fin: + vocab_size = 0 + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) >= 2 + word = fields[1] + self.__max_word_len = max(self.__max_word_len, len(word)) + self.__vocab.add(word) + vocab_size += 1 + + def tokenize(self, text): + results = self.__lac.lexical_analysis(texts=[text], use_gpu=False, batch_size=1, return_tag=True) + # Change English words to lower case. + # And just preserve the word in vocab. + words = results[0]["word"] + result = [] + for word in words: + word = word.lower() + if word in self.__vocab: + result.append(word) + return result + + def contains(self, word): + """Check whether the word is in the vocabulary. + """ + return word in self.__vocab diff --git a/hub_module/modules/text/semantic_model/slda_weibo/util.py b/modules/text/semantic_model/slda_weibo/util.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_weibo/util.py rename to modules/text/semantic_model/slda_weibo/util.py diff --git a/modules/text/semantic_model/slda_weibo/vocab.py b/modules/text/semantic_model/slda_weibo/vocab.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc01fe71ceb63d2328ddaf15da8173c9508c118 --- /dev/null +++ b/modules/text/semantic_model/slda_weibo/vocab.py @@ -0,0 +1,41 @@ +from paddlehub.common.logger import logger + +OOV = -1 + + +class WordCount(object): + def __init__(self, word_id, count): + self.word_id = word_id + self.count = count + + +class Vocab(object): + def __init__(self): + self.__term2id = {} + self.__id2term = {} + + def get_id(self, word): + if word not in self.__term2id: + return OOV + return self.__term2id[word] + + def load(self, vocab_file): + self.__term2id = {} + self.__id2term = {} + with open(vocab_file, 'r', encoding='utf-8') as fin: + for line in fin.readlines(): + fields = line.strip().split('\t') + assert len(fields) == 5, "Vocabulary file [%s] format error!" % (vocab_file) + term = fields[1] + id_ = int(fields[2]) + if term in self.__term2id: + logger.error("Duplicate word [%s] in vocab file!" % (term)) + continue + self.__term2id[term] = id_ + self.__id2term[id_] = term + + def size(self): + return len(self.__term2id) + + def vocabulary(self): + return self.__id2term diff --git a/hub_module/modules/text/semantic_model/slda_weibo/vose_alias.py b/modules/text/semantic_model/slda_weibo/vose_alias.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_weibo/vose_alias.py rename to modules/text/semantic_model/slda_weibo/vose_alias.py diff --git a/hub_module/modules/text/sentiment_analysis/README.md b/modules/text/sentiment_analysis/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/README.md rename to modules/text/sentiment_analysis/README.md diff --git a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md b/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/README.md rename to modules/text/sentiment_analysis/emotion_detection_textcnn/README.md diff --git a/hub_module/modules/text/semantic_model/slda_weibo/__init__.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/__init__.py similarity index 100% rename from hub_module/modules/text/semantic_model/slda_weibo/__init__.py rename to modules/text/sentiment_analysis/emotion_detection_textcnn/__init__.py diff --git a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/assets/vocab.txt b/modules/text/sentiment_analysis/emotion_detection_textcnn/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/assets/vocab.txt rename to modules/text/sentiment_analysis/emotion_detection_textcnn/assets/vocab.txt diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py new file mode 100644 index 0000000000000000000000000000000000000000..bfe7d54f85963f6b054e3c89e564a8fdee511969 --- /dev/null +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/module.py @@ -0,0 +1,229 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo, serving + +from emotion_detection_textcnn.net import textcnn_net +from emotion_detection_textcnn.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="emotion_detection_textcnn", + version="1.2.0", + summary="Baidu's open-source Emotion Detection Model(TextCNN).", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class EmotionDetectionTextCNN(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab = load_vocab(self.vocab_path) + self._word_seg_module = None + + self.predict = self.emotion_classify + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained emotion_detection_textcnn + + Args: + trainable(bool): Whether fine-tune the pretrained parameters of emotion_detection_textcnn or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of emotion_detection_textcnn (words) + outputs(dict): the output variables of input words (word embeddings and label probilities); + the sentence embedding and sequence length of the first input text. + main_program(Program): the main_program of emotion_detection_textcnn with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 240466 + emb_1 = fluid.layers.embedding( + input=text_1, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + # Add lstm layer. + pred, fc = textcnn_net(emb_1, seq_len_used) + pred_name = pred.name + fc_name = fc.name + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the emotion_detection_textcnn pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = { + "class_probs": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + @serving + def emotion_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the emotion prediction results results with the texts as input + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + Returns: + results(list): the emotion prediction results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, batch_data, self.vocab, use_gpu, batch_size) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"positive": 2, "negative": 0, "neutral": 1} + return self.labels + + +if __name__ == "__main__": + emotion_detection_textcnn = EmotionDetectionTextCNN() + inputs, outputs, main_program = emotion_detection_textcnn.context(num_slots=3) + print(inputs) + print(outputs) + # Data to be predicted + test_text = ["今天天气真好", "湿纸巾是干垃圾", "别来吵我"] + + input_dict = {"text": test_text} + results = emotion_detection_textcnn.emotion_classify(data=input_dict, batch_size=2) + for result in results: + print(result['text']) + print(result['emotion_label']) + print(result['emotion_key']) + print(result['positive_probs']) + print(result['negative_probs']) + print(result['neutral_probs']) diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py new file mode 100644 index 0000000000000000000000000000000000000000..5be0d9ea759f93eeefc9a87eef283546422f1026 --- /dev/null +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/net.py @@ -0,0 +1,28 @@ +# -*- coding:utf-8 -*- +import paddle.fluid as fluid + + +def textcnn_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=3, win_sizes=None): + """ + Textcnn_net + """ + if win_sizes is None: + win_sizes = [1, 2, 3] + + # unpad the token_feature + unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) + + # convolution layer + convs = [] + for win_size in win_sizes: + conv_h = fluid.nets.sequence_conv_pool( + input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max") + convs.append(conv_h) + convs_out = fluid.layers.concat(input=convs, axis=1) + + # full connect layer + fc_1 = fluid.layers.fc(input=[convs_out], size=hid_dim2, act="tanh") + # softmax layer + prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") + + return prediction, fc_1 diff --git a/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..10bd655af5cac809b51b2e10ad8325f9272a90e5 --- /dev/null +++ b/modules/text/sentiment_analysis/emotion_detection_textcnn/processor.py @@ -0,0 +1,68 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as fin: + wid = 0 + for line in fin: + data = line.strip().split("\t") + if len(data) == 1: + wstr = '' + vocab[wstr] = int(data[0]) + continue + else: + wstr = data[0] + vocab[wstr] = int(data[1]) + vocab[""] = len(vocab) + return vocab + + +def get_predict_label(probs): + label = int(np.argmax(probs)) + if label == 0: + key = "negative" + elif label == 2: + key = "positive" + else: + key = "neutral" + return label, key + + +def preprocess(lac, predicted_data, word_dict, use_gpu=False, batch_size=1): + result = [] + data_dict = {"text": predicted_data} + processed = lac.lexical_analysis(data=data_dict, use_gpu=use_gpu, batch_size=batch_size) + unk_id = word_dict[""] + for index, data in enumerate(processed): + result_i = {'processed': []} + result_i['origin'] = predicted_data[index] + for word in data['word']: + if word in word_dict: + _index = word_dict[word] + else: + _index = unk_id + result_i['processed'].append(_index) + result.append(result_i) + return result + + +def postprocess(prediction, texts): + result = [] + pred = prediction.as_ndarray() + for index in range(len(texts)): + result_i = {} + result_i['text'] = texts[index]['origin'] + label, key = get_predict_label(pred[index]) + result_i['emotion_label'] = label + result_i['emotion_key'] = key + result_i['positive_probs'] = float('%.4f' % pred[index, 2]) + result_i['negative_probs'] = float('%.4f' % (pred[index, 0])) + result_i['neutral_probs'] = float('%.4f' % (pred[index, 1])) + result.append(result_i) + return result diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md rename to modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.config.json b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.config.json similarity index 100% rename from hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.config.json rename to modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.config.json diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.vocab.txt b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.vocab.txt rename to modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/assets/ernie_1.0_large_ch.vocab.txt diff --git a/hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/__init__.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/emotion_detection_textcnn/__init__.py rename to modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/__init__.py diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..cc643efc526fe884448d6ca2d6ae3c4a780955ef --- /dev/null +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py @@ -0,0 +1,335 @@ +# -*- coding:utf-8 -** +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ERNIE""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import json +import logging + +import paddle.fluid as fluid +import six + +from .transformer_encoder import encoder, pre_process_layer +from .transformer_encoder import gelu + + +class ErnieModel(object): + """ + ErnieModel + """ + + def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): + """ + :param src_ids: + :param position_ids: + :param sentence_ids: + :param input_mask: + :param config: + :param weight_sharing: + :param use_fp16: + """ + self._hidden_size = config.get('hidden_size', 768) + self._emb_size = config.get('emb_size', self._hidden_size) + self._n_layer = config.get('num_hidden_layers', 12) + self._n_head = config.get('num_attention_heads', 12) + self._voc_size = config.get('vocab_size', 30522) + self._max_position_seq_len = config.get('max_position_embeddings', 512) + self._param_share = config.get('param_share', "normal") + self._pre_encoder_cmd = config.get('pre_encoder_cmd', "nd") + self._preprocess_cmd = config.get('preprocess_cmd', "") + self._postprocess_cmd = config.get('postprocess_cmd', "dan") + self._epsilon = config.get('epsilon', 1e-05) + self._emb_mapping_in = config.get('emb_mapping_in', False) + self._n_layer_per_block = config.get('n_layer_per_block', 1) + + if config.has('sent_type_vocab_size'): + self._sent_types = config['sent_type_vocab_size'] + else: + self._sent_types = config.get('type_vocab_size', 2) + + self._use_sentence_id = config.get('use_sentence_id', True) + self._use_task_id = config.get('use_task_id', False) + if self._use_task_id: + self._task_types = config.get('task_type_vocab_size', 3) + self._hidden_act = config.get('hidden_act', 'gelu') + self._prepostprocess_dropout = config.get('hidden_dropout_prob', 0.1) + self._attention_dropout = config.get('attention_probs_dropout_prob', 0.1) + self._weight_sharing = weight_sharing + + self._word_emb_name = "word_embedding" + self._pos_emb_name = "pos_embedding" + self._sent_emb_name = "sent_embedding" + self._task_emb_name = "task_embedding" + self._dtype = "float16" if use_fp16 else "float32" + self._emb_dtype = "float32" + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self._param_initializer = fluid.initializer.TruncatedNormal(scale=config.get('initializer_range', 0.02)) + + self._build_model(src_ids, position_ids, sentence_ids, input_mask) + + def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): + """ + :param src_ids: + :param position_ids: + :param sentence_ids: + :param input_mask: + :return: + """ + # padding id in vocabulary must be set to 0 + emb_out = fluid.layers.embedding( + input=src_ids, + dtype=self._emb_dtype, + size=[self._voc_size, self._emb_size], + param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), + is_sparse=False) + + position_emb_out = fluid.layers.embedding( + input=position_ids, + dtype=self._emb_dtype, + size=[self._max_position_seq_len, self._emb_size], + param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + position_emb_out + + if self._use_sentence_id: + sent_emb_out = fluid.layers.embedding( + sentence_ids, + dtype=self._emb_dtype, + size=[self._sent_types, self._emb_size], + param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) + + emb_out = emb_out + sent_emb_out + + emb_out = pre_process_layer( + emb_out, self._pre_encoder_cmd, self._prepostprocess_dropout, name='pre_encoder', epsilon=self._epsilon) + + if self._emb_mapping_in: + emb_out = fluid.layers.fc( + input=emb_out, + num_flatten_dims=2, + size=self._hidden_size, + param_attr=fluid.ParamAttr(name='emb_hidden_mapping', initializer=self._param_initializer), + bias_attr='emb_hidden_mapping_bias') + + if self._dtype == "float16": + emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) + input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) + self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) + + self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + self._enc_out, self._checkpoints = encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self._n_layer, + n_head=self._n_head, + d_key=self._hidden_size // self._n_head, + d_value=self._hidden_size // self._n_head, + d_model=self._hidden_size, + d_inner_hid=self._hidden_size * 4, + prepostprocess_dropout=self._prepostprocess_dropout, + attention_dropout=self._attention_dropout, + relu_dropout=0, + hidden_act=self._hidden_act, + preprocess_cmd=self._preprocess_cmd, + postprocess_cmd=self._postprocess_cmd, + param_initializer=self._param_initializer, + name='encoder', + param_share=self._param_share, + epsilon=self._epsilon, + n_layer_per_block=self._n_layer_per_block) + if self._dtype == "float16": + self._enc_out = fluid.layers.cast(x=self._enc_out, dtype=self._emb_dtype) + + def get_sequence_output(self): + """ + :return: + """ + return self._enc_out + + def get_pooled_output(self): + """Get the first feature of each sequence for classification""" + next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) + """ + if self._dtype == "float16": + next_sent_feat = fluid.layers.cast( + x=next_sent_feat, dtype=self._emb_dtype) + + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + param_attr=fluid.ParamAttr( + name="mask_lm_trans_fc.w_0", initializer=self._param_initializer), + bias_attr="mask_lm_trans_fc.b_0") + """ + """ + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._emb_size, + param_attr=fluid.ParamAttr( + name="mask_lm_trans_fc.w_0", initializer=self._param_initializer), + bias_attr="mask_lm_trans_fc.b_0") + + """ + next_sent_feat = fluid.layers.fc( + input=next_sent_feat, + size=self._hidden_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), + bias_attr="pooled_fc.b_0") + return next_sent_feat + + def get_lm_output(self, mask_label, mask_pos): + """Get the loss & accuracy for pretraining""" + mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + # extract the first token feature in each sentence + self.next_sent_feat = self.get_pooled_output() + reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._hidden_size]) + # extract masked tokens' feature + mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) + + if self._dtype == "float16": + mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) + + # transform: fc + if self._hidden_act == 'gelu' or self._hidden_act == 'gelu.precise': + _hidden_act = 'gelu' + elif self._hidden_act == 'gelu.approximate': + _hidden_act = None + else: + _hidden_act = self._hidden_act + mask_trans_feat = fluid.layers.fc( + input=mask_feat, + size=self._emb_size, + act=_hidden_act, + param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) + if self._hidden_act == 'gelu.approximate': + mask_trans_feat = gelu(mask_trans_feat) + else: + pass + # transform: layer norm + mask_trans_feat = fluid.layers.layer_norm( + mask_trans_feat, + begin_norm_axis=len(mask_trans_feat.shape) - 1, + param_attr=fluid.ParamAttr( + name='mask_lm_trans_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name='mask_lm_trans_layer_norm_bias', initializer=fluid.initializer.Constant(1.))) + # transform: layer norm + # mask_trans_feat = pre_process_layer( + # mask_trans_feat, 'n', name='mask_lm_trans') + + mask_lm_out_bias_attr = fluid.ParamAttr( + name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) + if self._weight_sharing: + fc_out = fluid.layers.matmul( + x=mask_trans_feat, + y=fluid.default_main_program().global_block().var(self._word_emb_name), + transpose_y=True) + fc_out += fluid.layers.create_parameter( + shape=[self._voc_size], dtype=self._emb_dtype, attr=mask_lm_out_bias_attr, is_bias=True) + + else: + fc_out = fluid.layers.fc( + input=mask_trans_feat, + size=self._voc_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), + bias_attr=mask_lm_out_bias_attr) + + mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) + mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) + + return mean_mask_lm_loss + + def get_task_output(self, task, task_labels): + """ + :param task: + :param task_labels: + :return: + """ + task_fc_out = fluid.layers.fc( + input=self.next_sent_feat, + size=task["num_labels"], + param_attr=fluid.ParamAttr(name=task["task_name"] + "_fc.w_0", initializer=self._param_initializer), + bias_attr=task["task_name"] + "_fc.b_0") + task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( + logits=task_fc_out, label=task_labels, return_softmax=True) + task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) + mean_task_loss = fluid.layers.mean(task_loss) + return mean_task_loss, task_acc + + +class ErnieConfig(object): + """parse ernie config""" + + def __init__(self, config_path): + """ + :param config_path: + """ + self._config_dict = self._parse(config_path) + + def _parse(self, config_path): + """ + :param config_path: + :return: + """ + try: + with open(config_path, 'r') as json_file: + config_dict = json.load(json_file) + except Exception: + raise IOError("Error in parsing Ernie model config file '%s'" % config_path) + else: + return config_dict + + def __getitem__(self, key): + """ + :param key: + :return: + """ + return self._config_dict.get(key, None) + + def has(self, key): + """ + :param key: + :return: + """ + if key in self._config_dict: + return True + return False + + def get(self, key, default_value): + """ + :param key,default_value: + :retrun: + """ + if key in self._config_dict: + return self._config_dict[key] + else: + return default_value + + def print_config(self): + """ + :return: + """ + for arg, value in sorted(six.iteritems(self._config_dict)): + logging.info('%s: %s' % (arg, value)) + logging.info('------------------------------------------------') diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..6c562795ef9d7f5fa04aa48f8b7a4cd79fec02c6 --- /dev/null +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py @@ -0,0 +1,450 @@ +# -*- coding:utf-8 -** +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer encoder.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers +import numpy as np + + +def gelu(x): + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU. + Original paper: https://arxiv.org/abs/1606.08415 + Args: + x: float Tensor to perform activation. + + Returns: + `x` with the GELU activation applied. + """ + cdf = 0.5 * (1.0 + fluid.layers.tanh((np.sqrt(2.0 / np.pi) * (x + 0.044715 * fluid.layers.pow(x, 3.0))))) + return x * cdf + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + param_initializer=None, + name='multi_head_att'): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError( + "Inputs: quries, keys and values should all be 3-D tensors. but {} v.s. {} v.s. {}"\ + .format(queries.shape, keys.shape, values.shape)) + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), + bias_attr=name + '_query_fc.b_0') + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), + bias_attr=name + '_key_fc.b_0') + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), + bias_attr=name + '_value_fc.b_0') + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) + v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), + bias_attr=name + '_output_fc.b_0') + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + if hidden_act == 'gelu' or hidden_act == 'gelu.precise': + _hidden_act = 'gelu' + elif hidden_act == 'gelu.approximate': + _hidden_act = None + else: + _hidden_act = hidden_act + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=_hidden_act, + param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), + bias_attr=name + '_fc_0.b_0') + if hidden_act == 'gelu.approximate': + hidden = gelu(hidden) + + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), + bias_attr=name + '_fc_1.b_0') + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., epsilon=1e-12, name=''): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out_dtype = out.dtype + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float32") + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.)), + epsilon=epsilon) + if out_dtype == fluid.core.VarDesc.VarType.FP16: + out = layers.cast(x=out, dtype="float16") + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name='', + epsilon=1e-12, +): + """The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + + attn_output = multi_head_attention( + enc_input, + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + '_multi_head_att') + + attn_output = post_process_layer( + enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att', epsilon=epsilon) + + ffd_output = positionwise_feed_forward( + attn_output, + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + '_ffn') + + return post_process_layer( + attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn', + epsilon=epsilon), ffd_output + + +def encoder_inner_share(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + epsilon, + param_initializer=None, + name='', + n_layer_per_block=1): + """ + The encoder_inner_share is composed of n_layer_per_block layers returned by calling + encoder_layer. + """ + _checkpoints = [] + for i in range(n_layer_per_block): + enc_output, cp = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name + '_layer_' + str(i), + epsilon=epsilon, + ) + _checkpoints.append(cp) + enc_input = enc_output + + return enc_output, _checkpoints + + +def encoder_outer_share(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + epsilon, + param_initializer=None, + name='', + n_layer_per_block=1): + """ + The encoder_outer_share is composed of n_layer_per_block layers returned by calling + encoder_layer. + """ + _checkpoints = [] + for i in range(n_layer_per_block): + enc_output, cp = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name, + epsilon=epsilon) + _checkpoints.append(cp) + enc_input = enc_output + + return enc_output, _checkpoints + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + epsilon, + n_layer_per_block, + param_initializer=None, + name='', + param_share=None): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer . + """ + checkpoints = [] + # for outer_share it will share same param in one block, + # and for inner_share it will share param across blocks, rather than in one same block + # + # outer-share inner-share + # [1] [1] ----\ 1st block + # [1] [2] ----/ + # [2] [1] ----\ 2nd block + # [2] [2] ----/ + + if param_share == "normal" or param_share == 'outer_share': + #n_layer_per_block=1, n_layer=24 for bert-large + #n_layer_per_block=1, n_layer=12 for bert-base + #n_layer_per_block=12, n_layer=12 for albert-xxlarge + #n_layer_per_block=6, n_layer=12 for albert-xxlarge-outershare + enc_fn = encoder_outer_share + name_fn = lambda i: name + '_layer_' + str(i) + elif param_share == "inner_share": + #n_layer_per_block = 2 + enc_fn = encoder_inner_share + name_fn = lambda i: name + else: + raise ValueError('unsupported param share mode') + + for i in range(n_layer // n_layer_per_block): + enc_output, cp = enc_fn( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + name=name_fn(i), + n_layer_per_block=n_layer_per_block, + epsilon=epsilon, + ) + checkpoints.extend(cp) + enc_input = enc_output + enc_output = pre_process_layer( + enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder", epsilon=epsilon) + + return enc_output, checkpoints diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e7021284cd76b2d4639b4ef8481ab32e16ea91df --- /dev/null +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py @@ -0,0 +1,238 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import os + +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo, runnable, serving +from paddlehub.reader.tokenization import convert_to_unicode, FullTokenizer +from paddlehub.reader.batching import pad_batch_data +import numpy as np + +from ernie_skep_sentiment_analysis.model.ernie import ErnieModel, ErnieConfig + + +@moduleinfo( + name="ernie_skep_sentiment_analysis", + version="1.0.0", + summary= + "SKEP: Sentiment Knowledge Enhanced Pre-training for Sentiment Analysis. Ernie_skep_sentiment_analysis module is initialize with enie_1.0_chn_large when pretraining. This module is finetuned on ChnSentiCorp dataset to do sentiment claasification. It can do sentiment analysis prediction directly, label as positive or negative.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis", +) +class ErnieSkepSentimentAnalysis(TransformerModule): + """ + Ernie_skep_sentiment_analysis module is initialize with enie_1.0_chn_large when pretraining. + This module is finetuned on ChnSentiCorp dataset to do sentiment claasification. + It can do sentiment analysis prediction directly, label as positive or negative. + """ + + def _initialize(self): + ernie_config_path = os.path.join(self.directory, "assets", "ernie_1.0_large_ch.config.json") + self.ernie_config = ErnieConfig(ernie_config_path) + self.MAX_SEQ_LEN = 512 + self.vocab_path = os.path.join(self.directory, "assets", "ernie_1.0_large_ch.vocab.txt") + self.params_path = os.path.join(self.directory, "assets", "params") + + self.infer_model_path = os.path.join(self.directory, "assets", "inference_step_601") + self.tokenizer = FullTokenizer(vocab_file=self.vocab_path) + + self.vocab = self.tokenizer.vocab + self.pad_id = self.vocab["[PAD]"] + self.label_map = {0: 'negative', 1: 'positive'} + + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + model_file_path = os.path.join(self.infer_model_path, 'model') + params_file_path = os.path.join(self.infer_model_path, 'params') + + config = AnalysisConfig(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + + config.disable_glog_info() + + self.predictor = create_paddle_predictor(config) + + def net(self, input_ids, position_ids, segment_ids, input_mask): + """ + create neural network. + Args: + input_ids (tensor): the word ids. + position_ids (tensor): the position ids. + segment_ids (tensor): the segment ids. + input_mask (tensor): the padding mask. + + Returns: + pooled_output (tensor): sentence-level output for classification task. + sequence_output (tensor): token-level output for sequence task. + """ + ernie = ErnieModel( + src_ids=input_ids, + position_ids=position_ids, + sentence_ids=segment_ids, + input_mask=input_mask, + config=self.ernie_config, + use_fp16=False) + + pooled_output = ernie.get_pooled_output() + sequence_output = ernie.get_sequence_output() + return pooled_output, sequence_output + + def array2tensor(self, arr_data): + """ + convert numpy array to PaddleTensor + """ + tensor_data = PaddleTensor(arr_data) + return tensor_data + + @serving + def predict_sentiment(self, texts=[], use_gpu=False): + """ + Get the sentiment label for the predicted texts. It will be classified as positive and negative. + Args: + texts (list(str)): the data to be predicted. + use_gpu (bool): Whether to use gpu or not. + Returns: + res (list): The result of sentiment label and probabilties. + """ + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + results = [] + for text in texts: + feature = self._convert_text_to_feature(text) + inputs = [self.array2tensor(ndarray) for ndarray in feature] + output = self.predictor.run(inputs) + probilities = np.array(output[0].data.float_data()) + label = self.label_map[np.argmax(probilities)] + result = { + 'text': text, + 'sentiment_label': label, + 'positive_probs': probilities[1], + 'negative_probs': probilities[0] + } + results.append(result) + + return results + + def _convert_text_to_feature(self, text): + """ + Convert the raw text to feature which is needed to run program (feed_vars). + """ + text_a = convert_to_unicode(text) + tokens_a = self.tokenizer.tokenize(text_a) + max_seq_len = 512 + + # Account for [CLS] and [SEP] with "- 2" + if len(tokens_a) > max_seq_len - 2: + tokens_a = tokens_a[0:(max_seq_len - 2)] + + tokens = [] + text_type_ids = [] + tokens.append("[CLS]") + text_type_ids.append(0) + for token in tokens_a: + tokens.append(token) + text_type_ids.append(0) + tokens.append("[SEP]") + text_type_ids.append(0) + + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + position_ids = list(range(len(token_ids))) + task_ids = [0] * len(token_ids) + + padded_token_ids, input_mask = pad_batch_data([token_ids], + max_seq_len=max_seq_len, + pad_idx=self.pad_id, + return_input_mask=True) + padded_text_type_ids = pad_batch_data([text_type_ids], max_seq_len=max_seq_len, pad_idx=self.pad_id) + padded_position_ids = pad_batch_data([position_ids], max_seq_len=max_seq_len, pad_idx=self.pad_id) + padded_task_ids = pad_batch_data([task_ids], max_seq_len=max_seq_len, pad_idx=self.pad_id) + + feature = [padded_token_ids, padded_position_ids, padded_text_type_ids, input_mask, padded_task_ids] + return feature + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.predict_sentiment(texts=[args.input_text], use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_text', type=str, default=None, help="data to be predicted") + + +if __name__ == '__main__': + test_module = ErnieSkepSentimentAnalysis() + test_texts = ['你不是不聪明,而是不认真', '虽然小明很努力,但是他还是没有考100分'] + results = test_module.predict_sentiment(test_texts, use_gpu=False) + print(results) + test_module.context(max_seq_len=128) + print(test_module.get_embedding(texts=[['你不是不聪明,而是不认真']])) + print(test_module.get_params_layer()) diff --git a/hub_module/modules/text/sentiment_analysis/senta_bilstm/README.md b/modules/text/sentiment_analysis/senta_bilstm/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bilstm/README.md rename to modules/text/sentiment_analysis/senta_bilstm/README.md diff --git a/hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/__init__.py b/modules/text/sentiment_analysis/senta_bilstm/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/__init__.py rename to modules/text/sentiment_analysis/senta_bilstm/__init__.py diff --git a/hub_module/modules/text/sentiment_analysis/senta_bilstm/assets/vocab.txt b/modules/text/sentiment_analysis/senta_bilstm/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bilstm/assets/vocab.txt rename to modules/text/sentiment_analysis/senta_bilstm/assets/vocab.txt diff --git a/modules/text/sentiment_analysis/senta_bilstm/module.py b/modules/text/sentiment_analysis/senta_bilstm/module.py new file mode 100644 index 0000000000000000000000000000000000000000..0ee5ca73e0e2e2134802096c856870a276f0a6e3 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_bilstm/module.py @@ -0,0 +1,238 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo, serving + +from senta_bilstm.net import bilstm_net +from senta_bilstm.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="senta_bilstm", + version="1.2.0", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class SentaBiLSTM(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") + self.word_dict = load_vocab(self.vocab_path) + self._word_seg_module = None + + self.predict = self.sentiment_classify + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained senta_bilstm + + Args: + trainable(bool): whether fine-tune the pretrained parameters of senta_bilstm or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of senta_bilstm (words) + outputs(dict): the output variables of input words (word embeddings and label probilities); + the sentence embedding and sequence length of the first input text. + main_program(Program): the main_program of Senta with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 1256607 + emb_1 = fluid.layers.embedding( + input=text_1, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + # Add lstm layer. + pred, fc = bilstm_net(emb_1, seq_len_used) + pred_name = pred.name + fc_name = fc.name + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the senta_lstm pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = { + "class_probs": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + @serving + def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the sentiment prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the word segmentation results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"positive": 1, "negative": 0} + return self.labels + + +if __name__ == "__main__": + senta = SentaBiLSTM() + inputs, outputs, main_program = senta.context(num_slots=3) + print(inputs) + print(outputs) + # Data to be predicted + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + # execute predict and print the result + input_dict = {"text": test_text} + results = senta.sentiment_classify(data=input_dict, batch_size=3) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + results = senta.sentiment_classify(texts=test_text) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_bilstm/net.py b/modules/text/sentiment_analysis/senta_bilstm/net.py new file mode 100755 index 0000000000000000000000000000000000000000..e7694c4beea3090fb902b0ad6d2aa97f29116004 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_bilstm/net.py @@ -0,0 +1,31 @@ +# -*- coding:utf-8 -*- +import paddle.fluid as fluid + + +def bilstm_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): + """ + Bi-Lstm net + """ + # unpad the token_feature + unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) + + # bi-lstm layer + fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) + rfc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) + lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, size=hid_dim * 4, is_reverse=False) + rlstm_h, c = fluid.layers.dynamic_lstm(input=rfc0, size=hid_dim * 4, is_reverse=True) + + # extract last layer + lstm_last = fluid.layers.sequence_last_step(input=lstm_h) + rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h) + lstm_last_tanh = fluid.layers.tanh(lstm_last) + rlstm_last_tanh = fluid.layers.tanh(rlstm_last) + + # concat layer + lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1) + # full connect layer + fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh') + # softmax layer + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + + return prediction, fc1 diff --git a/modules/text/sentiment_analysis/senta_bilstm/processor.py b/modules/text/sentiment_analysis/senta_bilstm/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..39190cf3a7c02a5e7974f32329a584f40db81832 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_bilstm/processor.py @@ -0,0 +1,62 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + wid = 0 + for line in f: + parts = line.rstrip().split('\t') + vocab[parts[0]] = int(parts[1]) + vocab[""] = len(vocab) + return vocab + + +def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): + """ + firstly, the predicted texts are segmented by lac module + then, the word segmention results input into senta + """ + result = [] + input_dict = {'text': texts} + processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size) + unk_id = word_dict[""] + for index, data in enumerate(processed): + result_i = {'processed': []} + result_i['origin'] = texts[index] + for word in data['word']: + if word in word_dict: + _index = word_dict[word] + else: + _index = unk_id + result_i['processed'].append(_index) + result.append(result_i) + return result + + +def postprocess(predict_out, texts): + """ + Convert model's output tensor to sentiment label + """ + predict_out = predict_out.as_ndarray() + batch_size = len(texts) + result = [] + for index in range(batch_size): + result_i = {} + result_i['text'] = texts[index]['origin'] + label = int(np.argmax(predict_out[index])) + if label == 0: + key = 'negative' + else: + key = 'positive' + result_i['sentiment_label'] = label + result_i['sentiment_key'] = key + result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) + result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) + result.append(result_i) + return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_bow/README.md b/modules/text/sentiment_analysis/senta_bow/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bow/README.md rename to modules/text/sentiment_analysis/senta_bow/README.md diff --git a/hub_module/modules/text/sentiment_analysis/senta_bilstm/__init__.py b/modules/text/sentiment_analysis/senta_bow/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bilstm/__init__.py rename to modules/text/sentiment_analysis/senta_bow/__init__.py diff --git a/hub_module/modules/text/sentiment_analysis/senta_bow/assets/vocab.txt b/modules/text/sentiment_analysis/senta_bow/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bow/assets/vocab.txt rename to modules/text/sentiment_analysis/senta_bow/assets/vocab.txt diff --git a/modules/text/sentiment_analysis/senta_bow/module.py b/modules/text/sentiment_analysis/senta_bow/module.py new file mode 100644 index 0000000000000000000000000000000000000000..04f4d275f55d738c2d928b130ebe4fc7d55468a4 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_bow/module.py @@ -0,0 +1,239 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import serving, moduleinfo + +from senta_bow.net import bow_net +from senta_bow.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="senta_bow", + version="1.2.0", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class SentaBow(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") + self.word_dict = load_vocab(self.vocab_path) + self._word_seg_module = None + + self.predict = self.sentiment_classify + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained senta_bow + + Args: + trainable(bool): Whether fine-tune the pretrained parameters of senta_bow or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of senta_bow (words) + outputs(dict): the output variables of input words (word embeddings and label probilities); + the sentence embedding and sequence length of the first input text. + main_program(Program): the main_program of Senta with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 1256607 + emb_1 = fluid.layers.embedding( + input=text_1, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + # Add lstm layer. + pred, fc = bow_net(emb_1, seq_len_used) + pred_name = pred.name + fc_name = fc.name + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + is_sparse=True, + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the senta_bow pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = { + "class_probs": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + @serving + def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the sentiment prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the word segmentation results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"positive": 1, "negative": 0} + return self.labels + + +if __name__ == "__main__": + senta = SentaBow() + inputs, outputs, main_program = senta.context(num_slots=3) + print(inputs) + print('*' * 20) + print(outputs) + # Data to be predicted + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + # execute predict and print the result + input_dict = {"text": test_text} + results = senta.sentiment_classify(data=input_dict) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + results = senta.sentiment_classify(texts=test_text) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/hub_module/modules/text/sentiment_analysis/senta_bow/net.py b/modules/text/sentiment_analysis/senta_bow/net.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bow/net.py rename to modules/text/sentiment_analysis/senta_bow/net.py diff --git a/modules/text/sentiment_analysis/senta_bow/processor.py b/modules/text/sentiment_analysis/senta_bow/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..b0c33cecc2ada48708b7864bef958da28205e26d --- /dev/null +++ b/modules/text/sentiment_analysis/senta_bow/processor.py @@ -0,0 +1,62 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + wid = 0 + for line in f: + parts = line.rstrip().split('\t') + vocab[parts[0]] = int(parts[1]) + vocab[""] = len(vocab) + return vocab + + +def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): + """ + firstly, the predicted texts are segmented by lac module + then, the word segmention results input into senta + """ + result = [] + input_dict = {"text": texts} + processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size) + unk_id = word_dict[""] + for index, data in enumerate(processed): + result_i = {'processed': []} + result_i['origin'] = texts[index] + for word in data['word']: + if word in word_dict: + _index = word_dict[word] + else: + _index = unk_id + result_i['processed'].append(_index) + result.append(result_i) + return result + + +def postprocess(predict_out, texts): + """ + Convert model's output tensor to sentiment label + """ + predict_out = predict_out.as_ndarray() + batch_size = len(texts) + result = [] + for index in range(batch_size): + result_i = {} + result_i['text'] = texts[index]['origin'] + label = int(np.argmax(predict_out[index])) + if label == 0: + key = 'negative' + else: + key = 'positive' + result_i['sentiment_label'] = label + result_i['sentiment_key'] = key + result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) + result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) + result.append(result_i) + return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_cnn/README.md b/modules/text/sentiment_analysis/senta_cnn/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_cnn/README.md rename to modules/text/sentiment_analysis/senta_cnn/README.md diff --git a/hub_module/modules/text/sentiment_analysis/senta_bow/__init__.py b/modules/text/sentiment_analysis/senta_cnn/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_bow/__init__.py rename to modules/text/sentiment_analysis/senta_cnn/__init__.py diff --git a/hub_module/modules/text/sentiment_analysis/senta_cnn/assets/vocab.txt b/modules/text/sentiment_analysis/senta_cnn/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_cnn/assets/vocab.txt rename to modules/text/sentiment_analysis/senta_cnn/assets/vocab.txt diff --git a/modules/text/sentiment_analysis/senta_cnn/module.py b/modules/text/sentiment_analysis/senta_cnn/module.py new file mode 100644 index 0000000000000000000000000000000000000000..6aa7c61fdf5c33403b7d56f8e045d594cd6bc416 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_cnn/module.py @@ -0,0 +1,229 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo, serving + +from senta_cnn.net import cnn_net +from senta_cnn.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="senta_cnn", + version="1.2.0", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class SentaCNN(hub.NLPPredictionModule): + def _initialize(self, user_dict=None): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") + self.word_dict = load_vocab(self.vocab_path) + self._word_seg_module = None + + self.predict = self.sentiment_classify + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained senta_cnn + + Args: + trainable(bool): Whether fine-tune the pretrained parameters of senta_cnn or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of senta_cnn (words) + outputs(dict): the output variables of input words (word embeddings and label probilities); + the sentence embedding and sequence length of the first input text. + main_program(Program): the main_program of Senta with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 1256607 + emb_1 = fluid.layers.embedding( + input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + # Add lstm layer. + pred, fc = cnn_net(emb_1, seq_len_used) + pred_name = pred.name + fc_name = fc.name + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the senta_lstm pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = { + "class_probs": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + @serving + def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the sentiment prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the word segmentation results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"positive": 1, "negative": 0} + return self.labels + + +if __name__ == "__main__": + senta = SentaCNN() + inputs, outputs, program = senta.context(num_slots=3) + # Data to be predicted + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + # execute predict and print the result + input_dict = {"text": test_text} + results = senta.sentiment_classify(data=input_dict) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + results = senta.sentiment_classify(texts=test_text) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_cnn/net.py b/modules/text/sentiment_analysis/senta_cnn/net.py new file mode 100755 index 0000000000000000000000000000000000000000..339471dc7e7706c83ec5416145b61601b1dc809f --- /dev/null +++ b/modules/text/sentiment_analysis/senta_cnn/net.py @@ -0,0 +1,21 @@ +# -*- coding:utf-8 -*- +import paddle.fluid as fluid + + +def cnn_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2, win_size=3): + """ + Conv net + """ + # unpad the token_feature + unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) + + # convolution layer + conv_3 = fluid.nets.sequence_conv_pool( + input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max") + # full connect layer + fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2) + + # softmax layer + prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") + + return prediction, fc_1 diff --git a/modules/text/sentiment_analysis/senta_cnn/processor.py b/modules/text/sentiment_analysis/senta_cnn/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..39190cf3a7c02a5e7974f32329a584f40db81832 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_cnn/processor.py @@ -0,0 +1,62 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + wid = 0 + for line in f: + parts = line.rstrip().split('\t') + vocab[parts[0]] = int(parts[1]) + vocab[""] = len(vocab) + return vocab + + +def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): + """ + firstly, the predicted texts are segmented by lac module + then, the word segmention results input into senta + """ + result = [] + input_dict = {'text': texts} + processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size) + unk_id = word_dict[""] + for index, data in enumerate(processed): + result_i = {'processed': []} + result_i['origin'] = texts[index] + for word in data['word']: + if word in word_dict: + _index = word_dict[word] + else: + _index = unk_id + result_i['processed'].append(_index) + result.append(result_i) + return result + + +def postprocess(predict_out, texts): + """ + Convert model's output tensor to sentiment label + """ + predict_out = predict_out.as_ndarray() + batch_size = len(texts) + result = [] + for index in range(batch_size): + result_i = {} + result_i['text'] = texts[index]['origin'] + label = int(np.argmax(predict_out[index])) + if label == 0: + key = 'negative' + else: + key = 'positive' + result_i['sentiment_label'] = label + result_i['sentiment_key'] = key + result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) + result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) + result.append(result_i) + return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_gru/README.md b/modules/text/sentiment_analysis/senta_gru/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_gru/README.md rename to modules/text/sentiment_analysis/senta_gru/README.md diff --git a/hub_module/modules/text/sentiment_analysis/senta_cnn/__init__.py b/modules/text/sentiment_analysis/senta_gru/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_cnn/__init__.py rename to modules/text/sentiment_analysis/senta_gru/__init__.py diff --git a/hub_module/modules/text/sentiment_analysis/senta_gru/assets/vocab.txt b/modules/text/sentiment_analysis/senta_gru/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_gru/assets/vocab.txt rename to modules/text/sentiment_analysis/senta_gru/assets/vocab.txt diff --git a/modules/text/sentiment_analysis/senta_gru/module.py b/modules/text/sentiment_analysis/senta_gru/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7d578947fff4b292012c7052d05a4a2dde7aee6c --- /dev/null +++ b/modules/text/sentiment_analysis/senta_gru/module.py @@ -0,0 +1,229 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo, serving + +from senta_gru.net import gru_net +from senta_gru.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="senta_gru", + version="1.2.0", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class SentaGRU(hub.NLPPredictionModule): + def _initialize(self, user_dict=None): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets/vocab.txt") + self.word_dict = load_vocab(self.vocab_path) + self._word_seg_module = None + + self.predict = self.sentiment_classify + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def context(self, trainable=False, max_seq_len=128, num_data=1): + """ + Get the input ,output and program of the pretrained senta_gru + + Args: + trainable(bool): Whether fine-tune the pretrained parameters of senta_gru or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_data(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of senta_gru (words) + outputs(dict): the output variables of input words (word embeddings and label probilities); + the sentence embedding and sequence length of the first input text. + main_program(Program): the main_program of Senta with pretrained prameters + """ + assert num_data >= 1 and num_data <= 3, "num_data(%d) must be 1, 2, or 3" % num_data + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 1256607 + emb_1 = fluid.layers.embedding( + input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + # Add lstm layer. + pred, fc = gru_net(emb_1, seq_len_used) + pred_name = pred.name + fc_name = fc.name + + if num_data > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_data > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the senta_lstm pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = { + "class_probs": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + @serving + def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the sentiment prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the word segmentation results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"positive": 1, "negative": 0} + return self.labels + + +if __name__ == "__main__": + senta = SentaGRU() + inputs, outputs, main_program = senta.context(num_slots=3) + # Data to be predicted + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + # execute predict and print the result + input_dict = {"text": test_text} + results = senta.sentiment_classify(data=input_dict) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + results = senta.sentiment_classify(texts=test_text) + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_gru/net.py b/modules/text/sentiment_analysis/senta_gru/net.py new file mode 100755 index 0000000000000000000000000000000000000000..a32f64e877a2eb304905566be6bf63f51d2d92a4 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_gru/net.py @@ -0,0 +1,23 @@ +# -*- coding:utf-8 -*- +import paddle.fluid as fluid + + +def gru_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): + """ + gru net + """ + # unpad the token_feature + unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) + + fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 3) + + # GRU layer + gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False) + gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max') + gru_max_tanh = fluid.layers.tanh(gru_max) + + # full connect layer + fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh') + # softmax layer + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + return prediction, fc1 diff --git a/modules/text/sentiment_analysis/senta_gru/processor.py b/modules/text/sentiment_analysis/senta_gru/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..39190cf3a7c02a5e7974f32329a584f40db81832 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_gru/processor.py @@ -0,0 +1,62 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + wid = 0 + for line in f: + parts = line.rstrip().split('\t') + vocab[parts[0]] = int(parts[1]) + vocab[""] = len(vocab) + return vocab + + +def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): + """ + firstly, the predicted texts are segmented by lac module + then, the word segmention results input into senta + """ + result = [] + input_dict = {'text': texts} + processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size) + unk_id = word_dict[""] + for index, data in enumerate(processed): + result_i = {'processed': []} + result_i['origin'] = texts[index] + for word in data['word']: + if word in word_dict: + _index = word_dict[word] + else: + _index = unk_id + result_i['processed'].append(_index) + result.append(result_i) + return result + + +def postprocess(predict_out, texts): + """ + Convert model's output tensor to sentiment label + """ + predict_out = predict_out.as_ndarray() + batch_size = len(texts) + result = [] + for index in range(batch_size): + result_i = {} + result_i['text'] = texts[index]['origin'] + label = int(np.argmax(predict_out[index])) + if label == 0: + key = 'negative' + else: + key = 'positive' + result_i['sentiment_label'] = label + result_i['sentiment_key'] = key + result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) + result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) + result.append(result_i) + return result diff --git a/hub_module/modules/text/sentiment_analysis/senta_lstm/README.md b/modules/text/sentiment_analysis/senta_lstm/README.md similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_lstm/README.md rename to modules/text/sentiment_analysis/senta_lstm/README.md diff --git a/hub_module/modules/text/sentiment_analysis/senta_gru/__init__.py b/modules/text/sentiment_analysis/senta_lstm/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_gru/__init__.py rename to modules/text/sentiment_analysis/senta_lstm/__init__.py diff --git a/hub_module/modules/text/sentiment_analysis/senta_lstm/assets/vocab.txt b/modules/text/sentiment_analysis/senta_lstm/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_lstm/assets/vocab.txt rename to modules/text/sentiment_analysis/senta_lstm/assets/vocab.txt diff --git a/modules/text/sentiment_analysis/senta_lstm/module.py b/modules/text/sentiment_analysis/senta_lstm/module.py new file mode 100644 index 0000000000000000000000000000000000000000..cc38f0b52312f34381e3e51ee40ddd123ba5cc88 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_lstm/module.py @@ -0,0 +1,224 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo, serving + +from senta_lstm.net import lstm_net +from senta_lstm.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="senta_lstm", + version="1.2.0", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class SentaLSTM(hub.NLPPredictionModule): + def _initialize(self, user_dict=None): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "assets", "infer_model") + self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.word_dict = load_vocab(self.vocab_path) + self._word_seg_module = None + + self.predict = self.sentiment_classify + + self._set_config() + + @property + def word_seg_module(self): + """ + lac module + """ + if not self._word_seg_module: + self._word_seg_module = hub.Module(name="lac") + return self._word_seg_module + + def context(self, trainable=False, max_seq_len=128, num_slots=1): + """ + Get the input ,output and program of the pretrained senta_lstm + + Args: + trainable(bool): Whether fine-tune the pretrained parameters of senta_lstm or not. + max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. + num_slots(int): It's number of data inputted to the model, selectted as following options: + + - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. + - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). + - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + + Returns: + inputs(dict): the input variables of senta_lstm (words) + outputs(dict): the output variables of input words (word embeddings and label probilities); + the sentence embedding and sequence length of the first input text. + main_program(Program): the main_program of Senta with pretrained prameters + """ + assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) + seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) + seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) + + # Add embedding layer. + w_param_attrs = fluid.ParamAttr( + name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) + dict_dim = 1256607 + emb_1 = fluid.layers.embedding( + input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) + emb_1_name = emb_1.name + data_list = [text_1] + emb_name_list = [emb_1_name] + + # Add lstm layer. + pred, fc = lstm_net(emb_1, seq_len_used) + pred_name = pred.name + fc_name = fc.name + + if num_slots > 1: + text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_2 = fluid.embedding( + input=text_2, + size=[dict_dim, 128], + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_2_name = emb_2.name + data_list.append(text_2) + emb_name_list.append(emb_2_name) + + if num_slots > 2: + text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) + emb_3 = fluid.embedding( + input=text_3, + size=[dict_dim, 128], + padding_idx=dict_dim - 1, + dtype='float32', + param_attr=w_param_attrs) + emb_3_name = emb_3.name + data_list.append(text_3) + emb_name_list.append(emb_3_name) + + variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], + list(main_program.global_block().vars.keys())) + prefix_name = "@HUB_{}@".format(self.name) + add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) + + for param in main_program.global_block().iter_parameters(): + param.trainable = trainable + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + # Load the senta_lstm pretrained model. + def if_exist(var): + return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) + + inputs = {'seq_len': seq_len} + outputs = { + "class_probs": main_program.global_block().vars[prefix_name + pred_name], + "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] + } + for index, data in enumerate(data_list): + if index == 0: + inputs['text'] = data + outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] + else: + inputs['text_%s' % (index + 1)] = data + outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + + emb_name_list[index]] + return inputs, outputs, main_program + + @serving + def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the sentiment prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the word segmentation results + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(self.word_seg_module, batch_data, self.word_dict, use_gpu, batch_size) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"positive": 1, "negative": 0} + return self.labels + + +if __name__ == "__main__": + senta = SentaLSTM() + senta.context(num_slots=3) + # Data to be predicted + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + # execute predict and print the result + input_dict = {"text": test_text} + results = senta.sentiment_classify(data=input_dict) + + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_lstm/net.py b/modules/text/sentiment_analysis/senta_lstm/net.py new file mode 100755 index 0000000000000000000000000000000000000000..3b5a83870bc6edee6c9785aa941f9181a5690c12 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_lstm/net.py @@ -0,0 +1,22 @@ +# -*- coding:utf-8 -*- +import paddle.fluid as fluid + + +def lstm_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): + """ + Lstm net + """ + # unpad the token_feature + unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) + # Lstm layer + fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) + lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, size=hid_dim * 4, is_reverse=False) + # max pooling layer + lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') + lstm_max_tanh = fluid.layers.tanh(lstm_max) + # full connect layer + fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') + # softmax layer + prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') + + return prediction, fc1 diff --git a/modules/text/sentiment_analysis/senta_lstm/processor.py b/modules/text/sentiment_analysis/senta_lstm/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..39190cf3a7c02a5e7974f32329a584f40db81832 --- /dev/null +++ b/modules/text/sentiment_analysis/senta_lstm/processor.py @@ -0,0 +1,62 @@ +# -*- coding:utf-8 -*- +import io +import numpy as np + + +def load_vocab(file_path): + """ + load the given vocabulary + """ + vocab = {} + with io.open(file_path, 'r', encoding='utf8') as f: + wid = 0 + for line in f: + parts = line.rstrip().split('\t') + vocab[parts[0]] = int(parts[1]) + vocab[""] = len(vocab) + return vocab + + +def preprocess(lac, texts, word_dict, use_gpu=False, batch_size=1): + """ + firstly, the predicted texts are segmented by lac module + then, the word segmention results input into senta + """ + result = [] + input_dict = {'text': texts} + processed = lac.lexical_analysis(data=input_dict, use_gpu=use_gpu, batch_size=batch_size) + unk_id = word_dict[""] + for index, data in enumerate(processed): + result_i = {'processed': []} + result_i['origin'] = texts[index] + for word in data['word']: + if word in word_dict: + _index = word_dict[word] + else: + _index = unk_id + result_i['processed'].append(_index) + result.append(result_i) + return result + + +def postprocess(predict_out, texts): + """ + Convert model's output tensor to sentiment label + """ + predict_out = predict_out.as_ndarray() + batch_size = len(texts) + result = [] + for index in range(batch_size): + result_i = {} + result_i['text'] = texts[index]['origin'] + label = int(np.argmax(predict_out[index])) + if label == 0: + key = 'negative' + else: + key = 'positive' + result_i['sentiment_label'] = label + result_i['sentiment_key'] = key + result_i['positive_probs'] = float('%.4f' % predict_out[index, 1]) + result_i['negative_probs'] = float('%.4f' % (1 - predict_out[index, 1])) + result.append(result_i) + return result diff --git a/hub_module/modules/text/syntactic_analysis/DDParser/README.md b/modules/text/syntactic_analysis/DDParser/README.md similarity index 100% rename from hub_module/modules/text/syntactic_analysis/DDParser/README.md rename to modules/text/syntactic_analysis/DDParser/README.md diff --git a/hub_module/modules/text/sentiment_analysis/senta_lstm/__init__.py b/modules/text/syntactic_analysis/DDParser/__init__.py similarity index 100% rename from hub_module/modules/text/sentiment_analysis/senta_lstm/__init__.py rename to modules/text/syntactic_analysis/DDParser/__init__.py diff --git a/modules/text/syntactic_analysis/DDParser/module.py b/modules/text/syntactic_analysis/DDParser/module.py new file mode 100644 index 0000000000000000000000000000000000000000..a63e4a6e4a6befc9bddf42bdd64bd0f87db17666 --- /dev/null +++ b/modules/text/syntactic_analysis/DDParser/module.py @@ -0,0 +1,191 @@ +# -*- coding:utf-8 -*- +import os +import argparse + +import numpy as np +import matplotlib as mpl +import matplotlib.pyplot as plt +import matplotlib.font_manager as font_manager +from paddle import fluid +import paddlehub as hub +from paddlehub.module.module import serving, moduleinfo, runnable + +try: + from ddparser import DDParser as DDParserModel +except: + raise ImportError( + "The module requires additional dependencies: ddparser. Please run 'pip install ddparser' to install it.") + + +@moduleinfo( + name="ddparser", + version="1.0.0", + summary="Baidu's open-source DDParser model.", + author="baidu-nlp", + author_email="", + type="nlp/syntactic_analysis") +class ddparser(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.ddp = DDParserModel(prob=True, use_pos=True) + self.font = font_manager.FontProperties(fname=os.path.join(self.directory, "SourceHanSans-Regular.ttf")) + + @serving + def serving_parse(self, texts=[], return_visual=False): + results = self.parse(texts, return_visual) + if return_visual: + for i, result in enumerate(results): + result['visual'] = result['visual'].tolist() + + return results + + def parse(self, texts=[], return_visual=False): + """ + parse the dependency. + + Args: + texts(list[list[str] or list[list[str]]]): the input texts to be parse. It should be a list with elements: untokenized string or tokens list. + return_visual(bool): if set True, the result will contain the dependency visualization. + + Returns: + results(list[dict]): a list, with elements corresponding to each of the elements in texts. The element is a dictionary of shape: + { + 'word': list[str], the tokenized words. + 'head': list[int], the head ids. + 'deprel': list[str], the dependency relation. + 'prob': list[float], the prediction probility of the dependency relation. + 'postag': list[str], the POS tag. If the element of the texts is list, the key 'postag' will not return. + 'visual' : list[numpy.array]: the dependency visualization. Use cv2.imshow to show or cv2.imwrite to save it. If return_visual=False, it will not return. + } + """ + + if not texts: + return + if all([isinstance(i, str) and i for i in texts]): + do_parse = self.ddp.parse + elif all([isinstance(i, list) and i for i in texts]): + do_parse = self.ddp.parse_seg + else: + raise ValueError("All of the elements should be string or list") + results = do_parse(texts) + if return_visual: + for result in results: + result['visual'] = self.visualize(result['word'], result['head'], result['deprel']) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + input_data = self.check_input_data(args) + + results = self.parse(texts=input_data) + + return results + + def visualize(self, word, head, deprel): + """ + Visualize the dependency. + + Args: + word: list[str], the tokenized words. + head: list[int], the head ids. + deprel: list[str], the dependency relation. + + Returns: + data: a numpy array, use cv2.imshow to show it or cv2.imwrite to save it. + """ + nodes = ['ROOT'] + word + x = list(range(len(nodes))) + y = [0] * (len(nodes)) + fig, ax = plt.subplots() + # control the picture size + max_span = max([abs(i + 1 - j) for i, j in enumerate(head)]) + fig.set_size_inches((len(nodes), max_span / 2)) + # set the points + plt.scatter(x, y, c='w') + + for i in range(len(nodes)): + txt = nodes[i] + xytext = (i, 0) + if i == 0: + # set 'ROOT' + ax.annotate( + txt, + xy=xytext, + xycoords='data', + xytext=xytext, + textcoords='data', + ) + else: + xy = (head[i - 1], 0) + rad = 0.5 if head[i - 1] < i else -0.5 + # set the word + ax.annotate( + txt, + xy=xy, + xycoords='data', + xytext=(xytext[0] - 0.1, xytext[1]), + textcoords='data', + fontproperties=self.font) + # draw the curve + ax.annotate( + "", + xy=xy, + xycoords='data', + xytext=xytext, + textcoords='data', + arrowprops=dict( + arrowstyle="<-", + shrinkA=12, + shrinkB=12, + color='blue', + connectionstyle="arc3,rad=%s" % rad, + ), + ) + # set the deprel label. Calculate its position by the radius + text_x = min(i, head[i - 1]) + abs((i - head[i - 1])) / 2 - 0.2 + text_y = abs((i - head[i - 1])) / 4 + ax.annotate(deprel[i - 1], xy=xy, xycoords='data', xytext=[text_x, text_y], textcoords='data') + + # control the axis + plt.axis('equal') + plt.axis('off') + + # save to numpy array + fig.canvas.draw() + data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))[:, :, ::-1] + return data + + +if __name__ == "__main__": + module = ddparser() + # Data to be predicted + test_text = ["百度是一家高科技公司"] + results = module.parse(texts=test_text) + print(results) + test_tokens = [['百度', '是', '一家', '高科技', '公司']] + results = module.parse(texts=test_text, return_visual=True) + print(results) + result = results[0] + data = module.visualize(result['word'], result['head'], result['deprel']) + import cv2 + import numpy as np + cv2.imwrite('test1.jpg', data) + cv2.imwrite('test2.jpg', result['visual']) diff --git a/hub_module/modules/text/syntactic_analysis/README.md b/modules/text/syntactic_analysis/README.md similarity index 100% rename from hub_module/modules/text/syntactic_analysis/README.md rename to modules/text/syntactic_analysis/README.md diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/README.md b/modules/text/text_generation/ernie_gen_acrostic_poetry/README.md similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/README.md rename to modules/text/text_generation/ernie_gen_acrostic_poetry/README.md diff --git a/hub_module/modules/text/syntactic_analysis/DDParser/__init__.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/__init__.py similarity index 100% rename from hub_module/modules/text/syntactic_analysis/DDParser/__init__.py rename to modules/text/text_generation/ernie_gen_acrostic_poetry/__init__.py diff --git a/modules/text/text_generation/ernie_gen_acrostic_poetry/model/decode.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/decode.py new file mode 100644 index 0000000000000000000000000000000000000000..d07a58b559796b0331946561ed2dcbdc85ffadae --- /dev/null +++ b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/decode.py @@ -0,0 +1,259 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size + logits_np = logits.numpy() + for i in range(beam_size): + logits_np[i][17963] = 0 # make [UNK] prob = 0 + logits = D.to_variable(logits_np) + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen_acrostic_poetry/model/file_utils.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..608be4efc6644626f7f408df200fd299f2dd997e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/file_utils.py @@ -0,0 +1,46 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..d5de28a5fee73371babd05b644e03a0f75ecdd5e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie.py @@ -0,0 +1,327 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'word_embedding'), initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'sent_embedding'), initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie_gen.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..2ad847a2ba0acd0d863ffea9a0cbe05e6e857908 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/modeling_ernie_gen.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen_acrostic_poetry.model.modeling_ernie import ErnieModel +from ernie_gen_acrostic_poetry.model.modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/ernie_gen_acrostic_poetry/model/tokenizing_ernie.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/tokenizing_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..c9e5638f9a17207ce2d664c27376f08138876da3 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_acrostic_poetry/model/tokenizing_ernie.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/ernie_gen_acrostic_poetry/module.py b/modules/text/text_generation/ernie_gen_acrostic_poetry/module.py new file mode 100644 index 0000000000000000000000000000000000000000..74c3c32f28451416e4a7a844dd48e7fc02a88dd9 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_acrostic_poetry/module.py @@ -0,0 +1,187 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from ernie_gen_acrostic_poetry.model.tokenizing_ernie import ErnieTokenizer +from ernie_gen_acrostic_poetry.model.decode import beam_search_infilling +from ernie_gen_acrostic_poetry.model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="ernie_gen_acrostic_poetry", + version="1.0.1", + summary= + "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for poetry generation task.", + author="adaxiadaxi", + author_email="", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self, line=4, word=7): + """ + initialize with the necessary elements + """ + if line not in [4, 8]: + raise ValueError("The line could only be 4 or 8.") + if word not in [5, 7]: + raise ValueError("The word could only be 5 or 7.") + + self.line = line + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_acrostic_poetry_L%sW%s" % (line, word)) + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = {j.strip().split('\t')[0]: i for i, j in enumerate(ernie_vocab_file.readlines())} + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the continuation of the input poetry. + + Args: + texts(list): the front part of a poetry. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the poetry continuations. + """ + if texts and isinstance(texts, list) and all(texts) and all([isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError("The input texts should be a list with nonempty string elements.") + for i, text in enumerate(texts): + if len(text) > self.line: + logger.warning( + 'The input text: %s, contains more than %i characters, which will be cut off' % (text, self.line)) + texts[i] = text[:self.line] + + for char in text: + if not '\u4e00' <= char <= '\u9fff': + logger.warning( + 'The input text: %s, contains non-Chinese characters, which may result in magic output' % text) + break + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len=80, + max_encode_len=20, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument('--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results + + +if __name__ == "__main__": + module = ErnieGen() + for result in module.generate(['夏雨荷', '我喜欢你'], beam_width=5): + print(result) diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/README.md b/modules/text/text_generation/ernie_gen_couplet/README.md similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_couplet/README.md rename to modules/text/text_generation/ernie_gen_couplet/README.md diff --git a/hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/__init__.py b/modules/text/text_generation/ernie_gen_couplet/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_acrostic_poetry/__init__.py rename to modules/text/text_generation/ernie_gen_couplet/__init__.py diff --git a/modules/text/text_generation/ernie_gen_couplet/model/decode.py b/modules/text/text_generation/ernie_gen_couplet/model/decode.py new file mode 100644 index 0000000000000000000000000000000000000000..1d706b52a42397455565cd20c8d3adfe819cec04 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_couplet/model/decode.py @@ -0,0 +1,255 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + _, vocab_size = logits.shape + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen_couplet/model/file_utils.py b/modules/text/text_generation/ernie_gen_couplet/model/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..608be4efc6644626f7f408df200fd299f2dd997e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_couplet/model/file_utils.py @@ -0,0 +1,46 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie.py b/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..d5de28a5fee73371babd05b644e03a0f75ecdd5e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie.py @@ -0,0 +1,327 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'word_embedding'), initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'sent_embedding'), initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie_gen.py b/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..7e512c61b4eca2d591c95cb4a6614f6f24a50309 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_couplet/model/modeling_ernie_gen.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen_couplet.model.modeling_ernie import ErnieModel +from ernie_gen_couplet.model.modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/ernie_gen_couplet/model/tokenizing_ernie.py b/modules/text/text_generation/ernie_gen_couplet/model/tokenizing_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..c9e5638f9a17207ce2d664c27376f08138876da3 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_couplet/model/tokenizing_ernie.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/ernie_gen_couplet/module.py b/modules/text/text_generation/ernie_gen_couplet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..994e5ea0a61ef557e726f4615141d40d52d5a394 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_couplet/module.py @@ -0,0 +1,176 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from ernie_gen_couplet.model.tokenizing_ernie import ErnieTokenizer +from ernie_gen_couplet.model.decode import beam_search_infilling +from ernie_gen_couplet.model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="ernie_gen_couplet", + version="1.0.2", + summary= + "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for couplet generation task.", + author="baidu-nlp", + author_email="", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_couplet") + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = {j.strip().split('\t')[0]: i for i, j in enumerate(ernie_vocab_file.readlines())} + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the right rolls from the left rolls. + + Args: + texts(list): the left rolls. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the right rolls. + """ + if texts and isinstance(texts, list) and all(texts) and all([isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError("The input texts should be a list with nonempty string elements.") + for i, text in enumerate(texts): + for char in text: + if not '\u4e00' <= char <= '\u9fff': + logger.warning( + 'The input text: %s, contains non-Chinese characters, which may result in magic output' % text) + break + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len=20, + max_encode_len=20, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument('--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results + + +if __name__ == "__main__": + module = ErnieGen() + for result in module.generate(['上海自来水来自海上', '风吹云乱天垂泪'], beam_width=5): + print(result) diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/README.md b/modules/text/text_generation/ernie_gen_lover_words/README.md similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_lover_words/README.md rename to modules/text/text_generation/ernie_gen_lover_words/README.md diff --git a/hub_module/modules/text/text_generation/ernie_gen_couplet/__init__.py b/modules/text/text_generation/ernie_gen_lover_words/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_couplet/__init__.py rename to modules/text/text_generation/ernie_gen_lover_words/__init__.py diff --git a/modules/text/text_generation/ernie_gen_lover_words/model/decode.py b/modules/text/text_generation/ernie_gen_lover_words/model/decode.py new file mode 100644 index 0000000000000000000000000000000000000000..1d706b52a42397455565cd20c8d3adfe819cec04 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_lover_words/model/decode.py @@ -0,0 +1,255 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + _, vocab_size = logits.shape + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen_lover_words/model/file_utils.py b/modules/text/text_generation/ernie_gen_lover_words/model/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..608be4efc6644626f7f408df200fd299f2dd997e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_lover_words/model/file_utils.py @@ -0,0 +1,46 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie.py b/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..d5de28a5fee73371babd05b644e03a0f75ecdd5e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie.py @@ -0,0 +1,327 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'word_embedding'), initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'sent_embedding'), initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie_gen.py b/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..135dc2bbeaaed290911537744f23e308340b51ce --- /dev/null +++ b/modules/text/text_generation/ernie_gen_lover_words/model/modeling_ernie_gen.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen_lover_words.model.modeling_ernie import ErnieModel +from ernie_gen_lover_words.model.modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/ernie_gen_lover_words/model/tokenizing_ernie.py b/modules/text/text_generation/ernie_gen_lover_words/model/tokenizing_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..c9e5638f9a17207ce2d664c27376f08138876da3 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_lover_words/model/tokenizing_ernie.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/ernie_gen_lover_words/module.py b/modules/text/text_generation/ernie_gen_lover_words/module.py new file mode 100644 index 0000000000000000000000000000000000000000..ccb70425949bcbf25270b3e5b82ec0564d9959a1 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_lover_words/module.py @@ -0,0 +1,170 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from ernie_gen_lover_words.model.tokenizing_ernie import ErnieTokenizer +from ernie_gen_lover_words.model.decode import beam_search_infilling +from ernie_gen_lover_words.model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="ernie_gen_lover_words", + version="1.0.1", + summary= + "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for lover's words generation task.", + author="adaxiadaxi", + author_email="", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_lover_words") + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = {j.strip().split('\t')[0]: i for i, j in enumerate(ernie_vocab_file.readlines())} + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the continuation of the input poetry. + + Args: + texts(list): the front part of a poetry. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the poetry continuations. + """ + if texts and isinstance(texts, list) and all(texts) and all([isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError("The input texts should be a list with nonempty string elements.") + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len=80, + max_encode_len=20, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument('--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results + + +if __name__ == "__main__": + module = ErnieGen() + for result in module.generate(['昔年旅南服,始识王荆州。', '高名出汉阴,禅阁跨香岑。'], beam_width=5): + print(result) diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/README.md b/modules/text/text_generation/ernie_gen_poetry/README.md similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_poetry/README.md rename to modules/text/text_generation/ernie_gen_poetry/README.md diff --git a/hub_module/modules/text/text_generation/ernie_gen_lover_words/__init__.py b/modules/text/text_generation/ernie_gen_poetry/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_lover_words/__init__.py rename to modules/text/text_generation/ernie_gen_poetry/__init__.py diff --git a/modules/text/text_generation/ernie_gen_poetry/model/decode.py b/modules/text/text_generation/ernie_gen_poetry/model/decode.py new file mode 100644 index 0000000000000000000000000000000000000000..1d706b52a42397455565cd20c8d3adfe819cec04 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_poetry/model/decode.py @@ -0,0 +1,255 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import numpy as np +from collections import namedtuple + +import paddle.fluid as F +import paddle.fluid.layers as L +import paddle.fluid.dygraph as D + + +def gen_bias(encoder_inputs, decoder_inputs, step): + decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] + attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) + decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), + 'float32') #[1, 1, decoderlen, decoderlen] + encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] + encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] + decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] + if step > 0: + bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) + else: + bias = L.concat([encoder_bias, decoder_bias], -1) + return bias + + +@D.no_grad +def greedy_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + tgt_type_id=3): + model.eval() + _, logits, info = model(q_ids, q_sids) + gen_ids = L.argmax(logits, -1) + d_batch, d_seqlen = q_ids.shape + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + has_stopped = np.zeros([d_batch], dtype=np.bool) + gen_seq_len = np.zeros([d_batch], dtype=np.int64) + output_ids = [] + + past_cache = info['caches'] + + cls_ids = L.ones([d_batch], dtype='int64') * sos_id + attn_ids = L.ones([d_batch], dtype='int64') * attn_id + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) + pos_ids += seqlen + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + gen_ids = L.argmax(logits, -1) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached + cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] + past_cache = (cached_k, cached_v) + + gen_ids = gen_ids[:, 1] + ids = L.stack([gen_ids, attn_ids], 1) + + gen_ids = gen_ids.numpy() + has_stopped |= (gen_ids == eos_id).astype(np.bool) + gen_seq_len += (1 - has_stopped.astype(np.int64)) + output_ids.append(gen_ids.tolist()) + if has_stopped.all(): + break + output_ids = np.array(output_ids).transpose([1, 0]) + return output_ids + + +BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) +BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) + + +def log_softmax(x): + e_x = np.exp(x - np.max(x)) + return np.log(e_x / e_x.sum()) + + +def mask_prob(p, onehot_eos, finished): + is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') + p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p + return p + + +def hyp_score(log_probs, length, length_penalty): + lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) + return log_probs / lp + + +def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): + """logits.shape == [B*W, V]""" + _, vocab_size = logits.shape + + bsz, beam_width = state.log_probs.shape + onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] + + probs = L.log(L.softmax(logits)) #[B*W, V] + probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] + allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] + + not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] + not_eos = 1 - onehot_eos + length_to_add = not_finished * not_eos #[B*W,V] + alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add + + allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) + alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) + allscore = hyp_score(allprobs, alllen, length_penalty) + if is_first_step: + allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 + scores, idx = L.topk(allscore, k=beam_width) #[B, W] + next_beam_id = idx // vocab_size #[B, W] + next_word_id = idx % vocab_size + + gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) + next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) + next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) + + gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) + next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), + state.finished.shape) #[gather new beam state according to new beam id] + + next_finished += L.cast(next_word_id == eos_id, 'int64') + next_finished = L.cast(next_finished > 0, 'int64') + + next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) + output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) + + return output, next_state + + +@D.no_grad +def beam_search_infilling(model, + q_ids, + q_sids, + sos_id, + eos_id, + attn_id, + max_encode_len=640, + max_decode_len=100, + beam_width=5, + tgt_type_id=3, + length_penalty=1.0): + model.eval() + _, __, info = model(q_ids, q_sids) + d_batch, d_seqlen = q_ids.shape + + state = BeamSearchState( + log_probs=L.zeros([d_batch, beam_width], 'float32'), + lengths=L.zeros([d_batch, beam_width], 'int64'), + finished=L.zeros([d_batch, beam_width], 'int64')) + outputs = [] + + def reorder_(t, parent_id): + """reorder cache according to parent beam id""" + gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) + t = L.gather(t, gather_idx) + return t + + def tile_(t, times): + _shapes = list(t.shape[1:]) + ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ + 1, + times, + ] + [ + 1, + ] * len(_shapes)), [ + -1, + ] + _shapes) + return ret + + cached_k, cached_v = info['caches'] + cached_k = [tile_(k, beam_width) for k in cached_k] + cached_v = [tile_(v, beam_width) for v in cached_v] + past_cache = (cached_k, cached_v) + + q_ids = tile_(q_ids, beam_width) + seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) + + cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id + attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS + ids = L.stack([cls_ids, attn_ids], -1) + for step in range(max_decode_len): + bias = gen_bias(q_ids, ids, step) + pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) + pos_ids += seqlen + + _, logits, info = model( + ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) + + output, state = beam_search_step( + state, + logits[:, 1], + eos_id=eos_id, + beam_width=beam_width, + is_first_step=(step == 0), + length_penalty=length_penalty) + outputs.append(output) + + past_cached_k, past_cached_v = past_cache + cached_k, cached_v = info['caches'] + cached_k = [ + reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) + ] # concat cached + cached_v = [ + reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) + ] + past_cache = (cached_k, cached_v) + + pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) + ids = L.stack([pred_ids_flatten, attn_ids], 1) + + if state.finished.numpy().all(): + break + + final_ids = L.stack([o.predicted_ids for o in outputs], 0) + final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) + final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, + #0] #pick best beam + final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) + return final_ids + + +en_patten = re.compile(r'^[a-zA-Z0-9]*$') + + +def post_process(token): + if token.startswith('##'): + ret = token[2:] + else: + if en_patten.match(token): + ret = ' ' + token + else: + ret = token + return ret diff --git a/modules/text/text_generation/ernie_gen_poetry/model/file_utils.py b/modules/text/text_generation/ernie_gen_poetry/model/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..608be4efc6644626f7f408df200fd299f2dd997e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_poetry/model/file_utils.py @@ -0,0 +1,46 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from tqdm import tqdm +from paddlehub.common.logger import logger +from paddlehub.common.dir import MODULE_HOME + + +def _fetch_from_remote(url, force_download=False): + import tempfile, requests, tarfile + cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") + if force_download or not os.path.exists(cached_dir): + with tempfile.NamedTemporaryFile() as f: + #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm( + r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): + if chunk: + f.write(chunk) + f.flush() + logger.debug('extacting... to %s' % f.name) + with tarfile.open(f.name) as tf: + tf.extractall(path=cached_dir) + logger.debug('%s cached in %s' % (url, cached_dir)) + return cached_dir + + +def add_docstring(doc): + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie.py b/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..d5de28a5fee73371babd05b644e03a0f75ecdd5e --- /dev/null +++ b/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie.py @@ -0,0 +1,327 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +import paddle.fluid.dygraph as D +import paddle.fluid as F +import paddle.fluid.layers as L + +log = logging.getLogger(__name__) + + +def _build_linear(n_in, n_out, name, init, act=None): + return D.Linear( + n_in, + n_out, + param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + act=act) + + +def _build_ln(n_in, name): + return D.LayerNorm( + normalized_shape=n_in, + param_attr=F.ParamAttr( + name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + bias_attr=F.ParamAttr( + name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), + ) + + +def append_name(name, postfix): + if name is None: + return None + elif name == '': + return postfix + else: + return '%s_%s' % (name, postfix) + + +class AttentionLayer(D.Layer): + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=cfg['attention_probs_dropout_prob'], + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = L.concat([cached_k, k], 1) + v = L.concat([cached_v, v], 1) + + q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), + [0, 2, 1, 3]) #[batch, head, seq, dim] + + q = L.scale(q, scale=self.d_key**-0.5) + score = L.matmul(q, k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = L.softmax(score, use_cudnn=True) + score = self.dropout(score) + + out = L.matmul(score, v) + out = L.transpose(out, [0, 2, 1, 3]) + out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) + + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(D.Layer): + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + assert cfg['hidden_act'] in ['relu', 'gelu'] + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs): + hidden = self.i(inputs) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(D.Layer): + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class ErnieModel(D.Layer): + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + D.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = D.Embedding([d_vocab, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'word_embedding'), initializer=initializer)) + self.pos_emb = D.Embedding([d_pos, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'pos_embedding'), initializer=initializer)) + self.sent_emb = D.Embedding([d_sent, d_emb], + param_attr=F.ParamAttr( + name=append_name(name, 'sent_embedding'), initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = lambda i: L.dropout( + i, + dropout_prob=prob, + dropout_implementation="upscale_in_train", + ) if self.training else i + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') + else: + self.pooler = None + self.train() + + def eval(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + + def train(self): + if F.in_dygraph_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_batch = L.shape(src_ids)[0] + d_seqlen = L.shape(src_ids)[1] + if pos_ids is None: + pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) + pos_ids = L.cast(pos_ids, 'int64') + if attn_bias is None: + if input_mask is None: + input_mask = L.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = L.unsqueeze(input_mask, axes=[-1]) + attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) + if use_causal_mask: + sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = L.unsqueeze(attn_bias, [1]) + attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + + if sent_ids is None: + sent_ids = L.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = self.pooler(encoded[:, 0, :]) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + else: + return pooled, encoded diff --git a/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie_gen.py b/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..4753b6e85220c55a324c47c3e4a47e63d29fa6ca --- /dev/null +++ b/modules/text/text_generation/ernie_gen_poetry/model/modeling_ernie_gen.py @@ -0,0 +1,65 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as F +import paddle.fluid.layers as L + +from ernie_gen_poetry.model.modeling_ernie import ErnieModel +from ernie_gen_poetry.model.modeling_ernie import _build_linear, _build_ln, append_name + + +class ErnieModelForGeneration(ErnieModel): + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = L.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=F.ParamAttr( + name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), + is_bias=True, + ) + + def forward(self, src_ids, *args, **kwargs): + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) + if encode_only: + return None, None, info + elif tgt_labels is None: + encoded = self.mlm(encoded) + encoded = self.mlm_ln(encoded) + logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = L.argmax(logits, -1) + return output_ids, logits, info + else: + encoded_2d = L.gather_nd(encoded, tgt_pos) + encoded_2d = self.mlm(encoded_2d) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias + if len(tgt_labels.shape) == 1: + tgt_labels = L.reshape(tgt_labels, [-1, 1]) + + loss = L.reduce_mean( + L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) + return loss, logits_2d, info diff --git a/modules/text/text_generation/ernie_gen_poetry/model/tokenizing_ernie.py b/modules/text/text_generation/ernie_gen_poetry/model/tokenizing_ernie.py new file mode 100644 index 0000000000000000000000000000000000000000..c9e5638f9a17207ce2d664c27376f08138876da3 --- /dev/null +++ b/modules/text/text_generation/ernie_gen_poetry/model/tokenizing_ernie.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import six +import re +import logging +from functools import partial + +import numpy as np + +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece( + match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type diff --git a/modules/text/text_generation/ernie_gen_poetry/module.py b/modules/text/text_generation/ernie_gen_poetry/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3c5c4724f750c8e273ef1f0a562a517dbbe9fe4c --- /dev/null +++ b/modules/text/text_generation/ernie_gen_poetry/module.py @@ -0,0 +1,187 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import json + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import argparse +import os +import numpy as np + +import paddle.fluid.dygraph as D + +from ernie_gen_poetry.model.tokenizing_ernie import ErnieTokenizer +from ernie_gen_poetry.model.decode import beam_search_infilling +from ernie_gen_poetry.model.modeling_ernie_gen import ErnieModelForGeneration + + +@moduleinfo( + name="ernie_gen_poetry", + version="1.0.2", + summary= + "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning. This module has fine-tuned for poetry generation task.", + author="baidu-nlp", + author_email="", + type="nlp/text_generation", +) +class ErnieGen(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + assets_path = os.path.join(self.directory, "assets") + gen_checkpoint_path = os.path.join(assets_path, "ernie_gen_poetry") + ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') + with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: + ernie_cfg = dict(json.loads(ernie_cfg_file.read())) + ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') + with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: + ernie_vocab = {j.strip().split('\t')[0]: i for i, j in enumerate(ernie_vocab_file.readlines())} + + with fluid.dygraph.guard(fluid.CPUPlace()): + with fluid.unique_name.guard(): + self.model = ErnieModelForGeneration(ernie_cfg) + finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) + self.model.set_dict(finetuned_states) + + self.tokenizer = ErnieTokenizer(ernie_vocab) + self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} + self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] + self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] + self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) + + @serving + def generate(self, texts, use_gpu=False, beam_width=5): + """ + Get the continuation of the input poetry. + + Args: + texts(list): the front part of a poetry. + use_gpu(bool): whether use gpu to predict or not + beam_width(int): the beam search width. + + Returns: + results(list): the poetry continuations. + """ + if texts and isinstance(texts, list) and all(texts) and all([isinstance(text, str) for text in texts]): + predicted_data = texts + else: + raise ValueError("The input texts should be a list with nonempty string elements.") + for i, text in enumerate(texts): + if ',' not in text or '。' not in text: + logger.warning( + "The input text: %s, does not contain ',' or '。', which is not a complete verse and may result in magic output" + % text) + else: + front, rear = text[:-1].split(',') + if len(front) != len(rear): + logger.warning( + "The input text: %s, is no antithetical parallelism, which may result in magic output" % text) + + for char in text: + if not '\u4e00' <= char <= '\u9fff' and char not in [',', '。']: + logger.warning( + "The input text: %s, contains characters not Chinese or ‘,’ '。', which may result in magic output" + % text) + break + + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + with fluid.dygraph.guard(place): + self.model.eval() + results = [] + for text in predicted_data: + sample_results = [] + ids, sids = self.tokenizer.encode(text) + src_ids = D.to_variable(np.expand_dims(ids, 0)) + src_sids = D.to_variable(np.expand_dims(sids, 0)) + output_ids = beam_search_infilling( + self.model, + src_ids, + src_sids, + eos_id=self.tokenizer.sep_id, + sos_id=self.tokenizer.cls_id, + attn_id=self.tokenizer.vocab['[MASK]'], + max_decode_len=80, + max_encode_len=20, + beam_width=beam_width, + tgt_type_id=1) + output_str = self.rev_lookup(output_ids[0].numpy()) + + for ostr in output_str.tolist(): + if '[SEP]' in ostr: + ostr = ostr[:ostr.index('[SEP]')] + sample_results.append("".join(ostr)) + results.append(sample_results) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + self.arg_config_group.add_argument('--beam_width', type=int, default=5, help="the beam search width") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) + + return results + + +if __name__ == "__main__": + module = ErnieGen() + for result in module.generate(['昔年旅南服,始识王荆州。', '高名出汉阴,禅阁跨香岑。'], beam_width=5): + print(result) diff --git a/hub_module/modules/text/text_generation/ernie_tiny_couplet/README.md b/modules/text/text_generation/ernie_tiny_couplet/README.md similarity index 100% rename from hub_module/modules/text/text_generation/ernie_tiny_couplet/README.md rename to modules/text/text_generation/ernie_tiny_couplet/README.md diff --git a/hub_module/modules/text/text_generation/ernie_gen_poetry/__init__.py b/modules/text/text_generation/ernie_tiny_couplet/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/ernie_gen_poetry/__init__.py rename to modules/text/text_generation/ernie_tiny_couplet/__init__.py diff --git a/modules/text/text_generation/ernie_tiny_couplet/module.py b/modules/text/text_generation/ernie_tiny_couplet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..36121ead4cbe24e9c4ab35b40a5bb32e62ce7429 --- /dev/null +++ b/modules/text/text_generation/ernie_tiny_couplet/module.py @@ -0,0 +1,130 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import ast +import argparse + +import paddlehub as hub +from paddlehub.module.module import moduleinfo, serving, runnable +from paddlehub.module.nlp_module import DataFormatError + + +@moduleinfo( + name="ernie_tiny_couplet", + version="1.0.0", + summary="couplet generation model fine-tuned with ernie_tiny module", + author="paddlehub", + author_email="", + type="nlp/text_generation", +) +class ErnieTinyCouplet(hub.NLPPredictionModule): + def _initialize(self, use_gpu=False): + # Load Paddlehub ERNIE Tiny pretrained model + self.module = hub.Module(name="ernie_tiny") + inputs, outputs, program = self.module.context(trainable=True, max_seq_len=128) + + # Download dataset and get its label list and label num + # If you just want labels information, you can omit its tokenizer parameter to avoid preprocessing the train set. + dataset = hub.dataset.Couplet() + self.label_list = dataset.get_labels() + + # Setup RunConfig for PaddleHub Fine-tune API + config = hub.RunConfig( + use_data_parallel=False, + use_cuda=use_gpu, + batch_size=1, + checkpoint_dir=os.path.join(self.directory, "assets", "ckpt"), + strategy=hub.AdamWeightDecayStrategy()) + + # Construct transfer learning network + # Use "pooled_output" for classification tasks on an entire sentence. + # Use "sequence_output" for token-level output. + pooled_output = outputs["pooled_output"] + sequence_output = outputs["sequence_output"] + + # Define a classfication fine-tune task by PaddleHub's API + self.gen_task = hub.TextGenerationTask( + feature=pooled_output, + token_feature=sequence_output, + max_seq_len=128, + num_classes=dataset.num_labels, + config=config, + metrics_choices=["bleu"]) + + def generate(self, texts): + # Add 0x02 between characters to match the format of training data, + # otherwise the length of prediction results will not match the input string + # if the input string contains non-Chinese characters. + formatted_text_a = list(map("\002".join, texts)) + + # Use the appropriate tokenizer to preprocess the data + # For ernie_tiny, it use BertTokenizer too. + tokenizer = hub.BertTokenizer(vocab_file=self.module.get_vocab_path()) + encoded_data = [tokenizer.encode(text=text, max_seq_len=128) for text in formatted_text_a] + results = self.gen_task.predict(data=encoded_data, label_list=self.label_list, accelerate_mode=False) + results = [["".join(sample_result) for sample_result in sample_results] for sample_results in results] + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data) + + return results + + @serving + def serving_method(self, texts): + """ + Run as a service. + """ + results = self.generate(texts) + return results + + +if __name__ == '__main__': + module = ErnieTinyCouplet() + results = module.generate(["风吹云乱天垂泪", "若有经心风过耳"]) + for result in results: + print(result) diff --git a/hub_module/modules/text/text_generation/plato2_en_base/README.md b/modules/text/text_generation/plato2_en_base/README.md similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_base/README.md rename to modules/text/text_generation/plato2_en_base/README.md diff --git a/hub_module/modules/text/text_generation/ernie_tiny_couplet/__init__.py b/modules/text/text_generation/plato2_en_base/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/ernie_tiny_couplet/__init__.py rename to modules/text/text_generation/plato2_en_base/__init__.py diff --git a/modules/text/text_generation/plato2_en_base/models/__init__.py b/modules/text/text_generation/plato2_en_base/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fdd781f4be7e68f55c2552d9100f80d400e1cc56 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/__init__.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Define model.""" + +from plato2_en_base.models.model_base import Model + +MODEL_REGISTRY = {} + +__all__ = ["MODEL_REGISTRY", "register_model", "create_model", "add_cmdline_args"] + + +def register_model(name): + """ + Register a new model class. + """ + + def __wrapped__(cls): + if name in MODEL_REGISTRY: + raise ValueError(f"Cannot register duplicate model ({name})") + if not issubclass(cls, Model): + raise ValueError(f"Model ({name}: {cls.__name__}) must extend Model") + MODEL_REGISTRY[name] = cls + return cls + + return __wrapped__ + + +def create_model(args, place) -> Model: + """ + Create a model. + """ + return MODEL_REGISTRY[args.model](args, place) + + +def add_cmdline_args(parser): + """ Add cmdline argument of Model. """ + group = parser.add_argument_group("Model") + + # Model + group.add_argument("--model", type=str, required=True) + + # Config + group.add_argument("--config_path", type=str, required=True) + + # Model related. + args, _ = parser.parse_known_args() + if args.model not in MODEL_REGISTRY: + raise ValueError(f"Unknown model type: {args.model}") + MODEL_REGISTRY[args.model].add_cmdline_args(parser) + return group + + +import plato2_en_base.models.nsp_model +import plato2_en_base.models.plato diff --git a/modules/text/text_generation/plato2_en_base/models/generator.py b/modules/text/text_generation/plato2_en_base/models/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..74c5b6efe1a2fd441400048e20f013171e0b7995 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/generator.py @@ -0,0 +1,268 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generator class""" + +import numpy as np +import paddle.fluid.layers as layers + +from plato2_en_base.utils.args import str2bool + + +class Generator(object): + """ + Generator class + + Use generator in inference phase. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Generator") + group.add_argument("--min_dec_len", type=int, default=1) + group.add_argument("--max_dec_len", type=int, default=64) + + group.add_argument( + "--decoding_strategy", + type=str, + default="topk_sampling", + choices=["beam_search", "topk_sampling", "topp_sampling"]) + group.add_argument("--temperature", type=float, default=1.) + group.add_argument("--ignore_unk", type=str2bool, default=True) + + # multi sampling + group.add_argument("--num_samples", type=int, default=None) + + # top-k sampling + group.add_argument("--topk", type=int, default=10) + + # top-p sampling + group.add_argument("--topp", type=float, default=0.9) + + # beam search + group.add_argument("--beam_size", type=int, default=10) + group.add_argument("--length_average", type=str2bool, default=True) + group.add_argument("--length_penalty", type=float, default=0.0) + + return group + + def __init__(self, args): + self.min_dec_len = args.min_dec_len + self.max_dec_len = args.max_dec_len + self.eos_id = args.eos_id + self.unk_id = args.unk_id + self.mask_id = args.mask_id + self.vocab_size = args.vocab_size + + # model related + + # basic settings + self.decoding_strategy = args.decoding_strategy + self.ignore_unk = args.ignore_unk + self.continuous_position = args.continuous_position + self.temperature = args.temperature + + # reranking + self.num_samples = args.num_samples + + # top-k sampling + self.topk = args.topk + + # top-p sampling + self.topp = args.topp + + # beam search + self.beam_size = args.beam_size + self.length_penalty = args.length_penalty + self.length_average = args.length_average + return + + def inference(self, model, inputs, outputs): + """ + Run inference. + + Args: + inputs(dict): Its key is input name(str) and its value is a Variable. + model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`. + + Returns: + dict(str:Variable): Its key is output name(str) and its value is a Variable. + """ + # prepare while loop + max_len = layers.fill_constant(shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True) + min_len = layers.fill_constant(shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True) + step_idx = layers.fill_constant(shape=[1], dtype="int64", value=0, force_cpu=True) + + ids = layers.array_write(layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx) + pos_biases = layers.array_write(layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx) + scores = layers.array_write(inputs["init_score"], step_idx) + tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], step_idx) + parent_idx = inputs["parent_idx"] + + if self.decoding_strategy == "beam_search": + beam_size = self.beam_size + else: + beam_size = 1 + + eos_penalty = np.zeros(self.vocab_size, dtype="float32") + eos_penalty[self.eos_id] = -1e9 + eos_penalty = layers.assign(eos_penalty) + + token_penalty = np.zeros(self.vocab_size, dtype="float32") + token_penalty[self.unk_id] = -1e9 + if self.mask_id >= 0: + token_penalty[self.mask_id] = -1e9 + token_penalty = layers.assign(token_penalty) + + # start while loop + cond = layers.less_than(x=step_idx, y=max_len) + while_op = layers.While(cond) + with while_op.block(): + pre_ids = layers.array_read(array=ids, i=step_idx) + pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) + pre_scores = layers.array_read(array=scores, i=step_idx) + pos_bias = layers.array_read(array=pos_biases, i=step_idx) + pos_bias = layers.gather(input=pos_bias, index=parent_idx) + + tmp_tgt_generation_mask = layers.array_read(tgt_generation_mask, i=step_idx) + dtype = tmp_tgt_generation_mask.dtype + + append_mask = layers.fill_constant_batch_size_like(input=pre_ids, value=1.0, shape=[-1, 1, 1], dtype=dtype) + tmp_tgt_generation_mask = layers.concat([tmp_tgt_generation_mask, append_mask], axis=2) + pre_mask = tmp_tgt_generation_mask = layers.gather(input=tmp_tgt_generation_mask, index=parent_idx) + + pre_sent = layers.fill_constant_batch_size_like( + input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype) + + if self.continuous_position: + pre_pos = layers.elementwise_mul( + x=layers.fill_constant_batch_size_like( + input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), + y=step_idx, + axis=0) + pos_bias + else: + pre_pos = layers.elementwise_mul( + x=layers.fill_constant_batch_size_like( + input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), + y=step_idx, + axis=0) + + dec_out, _ = model._generation_network( + token_ids=pre_ids, + type_ids=pre_sent, + pos_ids=pre_pos, + generation_mask=tmp_tgt_generation_mask, + gather_idx=parent_idx) + logits = model._calc_logits(dec_out) + + # ignore unk and mask token + if self.ignore_unk: + logits = layers.elementwise_add(logits, token_penalty, axis=1) + + # min dec length + min_len_cond = layers.less_than(x=step_idx, y=min_len) + + def min_len_penalty(): + """Plus minimum length penalty.""" + return layers.elementwise_add(logits, eos_penalty, axis=1) + + def no_penalty(): + """No penalty.""" + return logits + + logits = layers.case([(min_len_cond, min_len_penalty)], default=no_penalty) + + # get probs + probs = layers.softmax(logits / self.temperature) + + if self.decoding_strategy == "beam_search": + topk_scores, topk_indices = layers.topk(input=probs, k=beam_size) + else: + if self.decoding_strategy.startswith("sampling"): + sampling_ids = layers.sampling_id(probs, dtype="int") + elif self.decoding_strategy.startswith("topk_sampling"): + topk_probs, _ = layers.topk(input=probs, k=self.topk) + ge_cond = layers.cast( + layers.greater_equal(probs, layers.unsqueeze(topk_probs[:, -1], [1])), "float32") + old_probs = probs + probs = probs * ge_cond / layers.reduce_sum(topk_probs, dim=-1, keep_dim=True) + sampling_ids = layers.sampling_id(probs, dtype="int") + probs = old_probs + elif self.decoding_strategy.startswith("topp_sampling"): + sorted_probs, sorted_idx = layers.argsort(probs, descending=True) + cum_sorted_probs = layers.cumsum(sorted_probs, axis=1, exclusive=True) + lt_cond = layers.cast( + layers.less_than( + cum_sorted_probs, + layers.fill_constant_batch_size_like(cum_sorted_probs, cum_sorted_probs.shape, + cum_sorted_probs.dtype, self.topp)), "float32") + old_probs = probs + candidate_probs = sorted_probs * lt_cond + probs = candidate_probs / layers.reduce_sum(candidate_probs, dim=-1, keep_dim=True) + sampling_ids = layers.sampling_id(probs, dtype="int") + sampling_ids = layers.index_sample(sorted_idx, layers.unsqueeze(sampling_ids, [1])) + sampling_ids = layers.squeeze(sampling_ids, [1]) + probs = old_probs + else: + raise ValueError(self.decoding_strategy) + + sampling_scores = layers.one_hot(layers.unsqueeze(sampling_ids, [1]), probs.shape[1]) + sampling_scores = sampling_scores * probs - (1 - sampling_scores) * 1e3 + topk_scores, topk_indices = layers.topk(input=sampling_scores, k=1) + + pre_len = layers.cast(step_idx, "float32") + layers.increment(x=step_idx, value=1.0, in_place=True) + cur_len = layers.cast(step_idx, "float32") + + # update scores + if self.length_average: + accu_scores = layers.elementwise_add( + x=layers.log(topk_scores), y=pre_scores * pre_len, axis=0) / cur_len + elif self.length_penalty > 0: + pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty) + cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty) + accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores * pre_lp, axis=0) / cur_lp + else: + accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores, axis=0) + topk_indices = layers.lod_reset(topk_indices, pre_ids) + accu_scores = layers.lod_reset(accu_scores, pre_ids) + selected_ids, selected_scores, gather_idx = layers.beam_search( + pre_ids=pre_ids, + pre_scores=pre_scores, + ids=topk_indices, + scores=accu_scores, + beam_size=beam_size, + end_id=self.eos_id, + return_parent_idx=True) + + layers.array_write(selected_ids, i=step_idx, array=ids) + layers.array_write(selected_scores, i=step_idx, array=scores) + layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask) + layers.array_write(pos_bias, i=step_idx, array=pos_biases) + + layers.assign(gather_idx, parent_idx) + + length_cond = layers.less_than(x=step_idx, y=max_len) + finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) + layers.logical_and(x=length_cond, y=finish_cond, out=cond) + + finished_ids, finished_scores = layers.beam_search_decode(ids, scores, beam_size=beam_size, end_id=self.eos_id) + + predictions = { + "finished_ids": finished_ids, + "finished_scores": finished_scores, + "token_ids": inputs["token_ids"], + "data_id": inputs["data_id"] + } + return predictions diff --git a/modules/text/text_generation/plato2_en_base/models/model_base.py b/modules/text/text_generation/plato2_en_base/models/model_base.py new file mode 100644 index 0000000000000000000000000000000000000000..0e0803ef5cbccf038e8537ee44cd309b01ffb3f7 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/model_base.py @@ -0,0 +1,288 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Model base.""" + +from abc import abstractmethod, ABC + +import paddle.fluid as fluid +from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.fluid.layers as layers + +from plato2_en_base.models.optimizer import AdamW +from plato2_en_base.utils import init_pretraining_params, init_checkpoint, to_lodtensor +from plato2_en_base.utils.args import str2bool + + +class Model(ABC): + """ + Basic model wrapper for paddle. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Model") + # Init checkpoint + group.add_argument("--init_checkpoint", type=str, default="") + group.add_argument("--init_pretraining_params", type=str, default="") + + # Optimizer + group.add_argument("-lr", "--learning_rate", type=float, default=1e-5, help="The learning rate for optimizer.") + group.add_argument("--warmup_steps", type=int, default=0, help="The warmup steps.") + group.add_argument("--weight_decay", type=float, default=0.0, help="The weight decay for optimizer.") + group.add_argument("--max_grad_norm", type=float, default=.1, help="The maximum norm of gradient.") + + group.add_argument("--use_recompute", type=str2bool, default=False) + group.add_argument("--use_amp", type=str2bool, default=False) + group.add_argument("--amp_loss_scaling", type=float, default=12800) + return group + + def __init__(self, args, place): + self.place = place + self.exe = fluid.Executor(place) + + self.init_checkpoint = args.init_checkpoint + self.init_pretraining_params = args.init_pretraining_params + + self.learning_rate = args.learning_rate + self.warmup_steps = args.warmup_steps + self.weight_decay = args.weight_decay + self.max_grad_norm = args.max_grad_norm + + self.is_distributed = args.is_distributed + self.use_recompute = args.use_recompute + self.use_amp = args.use_amp + self.amp_loss_scaling = args.amp_loss_scaling + self.run_infer = args.get("run_infer", False) + self.batch_size = args.get("batch_size", 1) + self._build_programs() + return + + def _build_programs(self): + """ + Build programs. + + Build train_program, eval_program and inference_program. Only use in static graph mode. + """ + if self.run_infer: + self.startup_program = fluid.Program() + # build infer program + self.infer_program = fluid.Program() + with fluid.program_guard(self.infer_program, self.startup_program): + with fluid.unique_name.guard(): + self.infer_feed_dict = inputs = self._get_feed_dict(is_infer=True) + outputs = self.forward(inputs, is_infer=True) + predictions = self.infer(inputs, outputs) + self.infer_fetch_dict = predictions + self.infer_program = self.infer_program.clone(for_test=True) + + self.program = self.infer_program + else: + if self.is_distributed: + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.use_experimental_executor = True + exec_strategy.num_threads = 4 + exec_strategy.num_iteration_per_drop_scope = 1 + + dist_strategy = DistributedStrategy() + dist_strategy.exec_strategy = exec_strategy + dist_strategy.nccl_comm_num = 1 + dist_strategy.fuse_all_reduce_ops = True + if self.use_recompute: + dist_strategy.forward_recompute = True + dist_strategy.enable_sequential_execution = True + if self.use_amp: + dist_strategy.use_amp = True + dist_strategy.amp_loss_scaling = self.amp_loss_scaling + self.dist_strategy = dist_strategy + + self.startup_program = fluid.Program() + # build train program + self.train_program = fluid.Program() + with fluid.program_guard(self.train_program, self.startup_program): + with fluid.unique_name.guard(): + self.feed_dict = inputs = self._get_feed_dict() + outputs = self.forward(inputs) + if self.is_distributed and self.use_recompute: + self.dist_strategy.recompute_checkpoints = outputs["checkpoints"] + metrics, statistics = self.get_metrics_and_statistics(inputs, outputs) + + # build eval program + self.eval_program = self.train_program.clone(for_test=True) + self.eval_fetch_dict = {**metrics, **statistics} + + scheduled_lr = self.optimize(metrics) + metrics["scheduled_lr"] = scheduled_lr + self.train_fetch_dict = metrics + + self.program = self.train_program + if self.is_distributed: + self.train_program = fleet.main_program + + self.exe.run(self.startup_program) + if self.init_pretraining_params != "": + init_pretraining_params(self.exe, self.init_pretraining_params, self.program) + elif self.init_checkpoint != "": + init_checkpoint(self.exe, self.init_checkpoint, self.program) + return + + def load(self, model_dir, is_checkpoint=False): + """ + Load persistables or parameters. + """ + # TODO: support dygraph. + if is_checkpoint: + init_checkpoint(self.exe, model_dir, self.program) + else: + init_pretraining_params(self.exe, model_dir, self.program) + return + + def save(self, model_dir, is_checkpoint=False): + """ + Save persistables or parameters. + """ + # TODO: support dygraph. + if is_checkpoint: + fluid.io.save_persistables(self.exe, model_dir, self.program) + else: + fluid.io.save_params(self.exe, model_dir, self.program) + return + + @abstractmethod + def _get_feed_dict(self, is_infer=False): + """ + Return input feed list. + """ + pass + + def _get_feed(self, inputs, is_infer=False): + """ + Convert `inputs` into model's feed data format. + """ + if isinstance(inputs, list): + # return list direclty which is used in `get_data_loader`. + return inputs + for k in inputs: + if isinstance(inputs[k], list): + inputs[k] = to_lodtensor(inputs[k], self.place) + return inputs + + def get_data_loader(self, generator=None, is_infer=False): + """ + Return DataLoader. + + If generator is not `None`, the data loader set it as the batch generator. + """ + # TODO: support dygraph. + if is_infer: + feed_name_list, feed_list = zip(*self.infer_feed_dict.items()) + else: + feed_name_list, feed_list = zip(*self.feed_dict.items()) + loader = fluid.io.DataLoader.from_generator( + feed_list=feed_list, capacity=64, use_double_buffer=True, iterable=True) + if generator is not None: + + def __wrapper__(): + for batch in generator(): + batch = self._get_feed(batch) + batch = [batch[name] for name in feed_name_list if name in batch] + yield batch + + loader.set_batch_generator(__wrapper__, self.place) + return loader + + @abstractmethod + def forward(self, inputs, is_infer=False): + """ + Run model main forward. + """ + pass + + @abstractmethod + def get_metrics_and_statistics(self, inputs, outputs): + """ + Get metrics and statistics. + """ + pass + + @abstractmethod + def infer(self, inputs, outputs): + """ + Run model inference. + """ + pass + + def optimize(self, metrics): + """ + Optimize the model by metrics(mainly `metrics["loss"]`). + """ + # TODO: support dygraph + if self.warmup_steps > 0: + scheduled_lr = layers.learning_rate_scheduler.noam_decay(1 / (self.warmup_steps * (self.learning_rate**2)), + self.warmup_steps) + else: + scheduled_lr = layers.create_global_var( + name=fluid.unique_name.generate("learning_rate"), + shape=[1], + value=self.learning_rate, + dtype="float32", + persistable=True) + grad_clip = fluid.clip.GradientClipByGlobalNorm(self.max_grad_norm) + + self.optimizer = AdamW(learning_rate=scheduled_lr, grad_clip=grad_clip, weight_decay=self.weight_decay) + + if self.is_distributed: + self.optimizer = fleet.distributed_optimizer(self.optimizer, strategy=self.dist_strategy) + + self.optimizer.minimize(metrics["loss"]) + return scheduled_lr + + def _execute(self, program, feed, fetch_dict, **kwargs): + """ + Execute program. + """ + fetch_list = [var.name for var in fetch_dict.values()] + fetch_vars = self.exe.run(program, feed, fetch_list, **kwargs) + return dict(zip(fetch_dict.keys(), fetch_vars)) + + def train_step(self, inputs): + """ + Run one training step. + """ + # TODO: support dygraph. + return self._execute(self.train_program, self._get_feed(inputs), self.train_fetch_dict, use_program_cache=True) + + def eval_step(self, inputs): + """ + Run one evaluation step. + """ + # TODO: support dygraph. + return self._execute(self.eval_program, self._get_feed(inputs), self.eval_fetch_dict) + + def infer_step(self, inputs): + """ + Run one inference step. + """ + # TODO: support dygraph. + return self._execute(self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict) + + def save_inference_model(self, inference_model_path): + """ + Save the inference model. + """ + feed_list = [var.name for var in self.infer_feed_dict.values()] + fetch_list = list(self.infer_fetch_dict.values()) + + fluid.io.save_inference_model(inference_model_path, feed_list, fetch_list, self.exe, self.infer_program) diff --git a/modules/text/text_generation/plato2_en_base/models/nsp_model.py b/modules/text/text_generation/plato2_en_base/models/nsp_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8c8f2d9ad484e2bec70577320ef239fd85f3a2bd --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/nsp_model.py @@ -0,0 +1,107 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NSP model.""" + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from . import register_model +from .model_base import Model +from .unified_transformer import UnifiedTransformer + + +@register_model("NSPModel") +class NSPModel(UnifiedTransformer): + """NSP model.""" + + def _get_feed_dict(self, is_infer=False): + """ + Get the feed list of the model. + + Args: + is_infer(bool): True if running inference. + + Returns: + list(Variable): The feed list. + list(str): The name of each Variable in feed list. + """ + feed_dict = {} + feed_dict["token_ids"] = layers.data(name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["type_ids"] = layers.data(name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["pos_ids"] = layers.data(name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + + feed_dict["attention_mask"] = layers.data( + name="attention_mask", shape=[-1, self.max_seq_len, self.max_seq_len], dtype=self.dtype) + feed_dict["label_pos"] = layers.data(name="label_pos", shape=[-1, 1], dtype="int64") + + if not is_infer: + feed_dict["label"] = layers.data(name="label", shape=[-1, 1], dtype="int64") + feed_dict["tgt_label"] = layers.data(name="tgt_ids", shape=[-1, 1], dtype="int64") + feed_dict["tgt_pos"] = layers.data(name="tgt_pos", shape=[-1, 1], dtype="int64") + + feed_dict["data_id"] = layers.data(name="data_id", shape=[-1, 1], dtype="int64") + return feed_dict + + def _get_feed(self, inputs, is_infer=False): + return Model._get_feed(self, inputs, is_infer) + + def forward(self, inputs, is_infer=False): + outputs = {} + self.generation_caches = None + outputs["enc_out"], self.checkpoints = self._generation_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + generation_mask=inputs["attention_mask"]) + return outputs + + def _get_metrics(self, inputs, outputs): + metrics = {} + fc_out = self._calc_logits(outputs["enc_out"], inputs["tgt_pos"]) + lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_pos"]) + need_cal = layers.not_equal(inputs["tgt_label"], layers.fill_constant(shape=[1], dtype="int64", value=1)) + need_cal = layers.cast(need_cal, self.dtype) + mean_lm_loss = layers.reduce_sum(lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10) + + pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_pos"]) + nsp_fc_out = layers.fc( + input=pooled_out, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self.param_initializer), + bias_attr="next_sent_fc.b_0") + nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy( + logits=nsp_fc_out, label=inputs["label"], return_softmax=True) + + nsp_acc = layers.accuracy(nsp_softmax, inputs["label"]) + mean_nsp_loss = layers.mean(nsp_loss) + + metrics["loss"] = mean_lm_loss + mean_nsp_loss + metrics["lm_loss"] = mean_lm_loss + metrics["nsp_loss"] = mean_nsp_loss + metrics["nsp_acc"] = nsp_acc + return metrics + + def infer(self, inputs, outputs): + pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_pos"]) + nsp_fc_out = layers.fc( + input=pooled_out, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self.param_initializer), + bias_attr="next_sent_fc.b_0") + scores = layers.softmax(nsp_fc_out) + predictions = {"scores": scores, "data_id": inputs["data_id"]} + return predictions + + def infer_step(self, inputs): + return Model.infer_step(self, inputs) diff --git a/modules/text/text_generation/plato2_en_base/models/optimizer.py b/modules/text/text_generation/plato2_en_base/models/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..0381f12390a2f1fb672ed8a5ed3d815874fddb21 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/optimizer.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Optimizer.""" + +import re + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +class AdamW(fluid.optimizer.AdamOptimizer): + """AdamW object for dygraph""" + + def __init__(self, *args, **kwargs): + weight_decay = kwargs.pop('weight_decay', None) + var_name_to_exclude = kwargs.pop('var_name_to_exclude', '.*layer_norm_scale|.*layer_norm_bias|.*b_0') + super(AdamW, self).__init__(*args, **kwargs) + self.wd = weight_decay + self.pat = re.compile(var_name_to_exclude) + + def apply_optimize(self, loss, startup_program, params_grads): + """Update params with weight decay.""" + super(AdamW, self).apply_optimize(loss, startup_program, params_grads) + for p, g in params_grads: + if not self.pat.match(p.name): + layers.assign(p * (1. - self.wd * self._learning_rate), p) diff --git a/modules/text/text_generation/plato2_en_base/models/plato.py b/modules/text/text_generation/plato2_en_base/models/plato.py new file mode 100644 index 0000000000000000000000000000000000000000..e76884664c79f903d899ad0acdf349dbdc218ad2 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/plato.py @@ -0,0 +1,241 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plato model.""" + +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from . import register_model +from .model_base import Model +from .unified_transformer import UnifiedTransformer +from .transformer_block import encoder, pre_process_layer +from plato2_en_base.utils import repeat_array_or_tensor +from plato2_en_base.utils.args import str2bool +from .generator import Generator + + +@register_model("Plato") +class Plato(UnifiedTransformer): + """Plato model.""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = UnifiedTransformer.add_cmdline_args(parser) + group.add_argument("--use_bow", type=str2bool, default=True) + group.add_argument("--use_entropy", type=str2bool, default=False) + return group + + def __init__(self, args, place): + # latent related + self.mask_id = args.mask_id + self.latent_type_size = args.latent_type_size + self.latent_emb_name = "latent_embedding" + self.use_bow = args.use_bow + self.use_entropy = args.use_entropy + + super(Plato, self).__init__(args, place) + + def _get_feed_dict(self, is_infer=False): + feed_dict = {} + feed_dict["token_ids"] = layers.data(name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["type_ids"] = layers.data(name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["pos_ids"] = layers.data(name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + + if not is_infer: + feed_dict["recognition_mask"] = layers.data( + name="recognition_mask", shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], dtype=self.dtype) + feed_dict["generation_mask"] = layers.data( + name="generation_mask", shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], dtype=self.dtype) + + if is_infer: + feed_dict["tgt_ids"] = layers.data( + name="tgt_ids", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["tgt_pos"] = layers.data( + name="tgt_pos", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["init_score"] = layers.data(name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) + feed_dict["parent_idx"] = layers.data(name="parent_idx", shape=[-1], dtype="int64") + + feed_dict["tgt_generation_mask"] = layers.data( + name="tgt_generation_mask", shape=[-1, 1, self.max_seq_len + 1], dtype="float32") + feed_dict["latent_id"] = layers.data(name="latent_id", shape=[-1, 1], dtype="int64") + else: + feed_dict["tgt_label"] = layers.data(name="tgt_label", shape=[-1, 1], dtype="int64") + feed_dict["tgt_pos"] = layers.data(name="tgt_pos", shape=[-1, 1], dtype="int64") + + if self.use_bow: + feed_dict["bow_label"] = layers.data(name="bow_label", shape=[-1, 1], dtype="int64") + feed_dict["bow_pos"] = layers.data(name="bow_pos", shape=[-1, 1], dtype="int64") + + feed_dict["data_id"] = layers.data(name="data_id", shape=[-1, 1], dtype="int64") + return feed_dict + + def _recognition_network(self, token_ids, type_ids, pos_ids, recognition_mask): + mask_id = layers.fill_constant_batch_size_like( + input=token_ids, shape=[-1, 1, 1], value=self.mask_id, dtype="int64") + mask_emb = layers.embedding( + input=mask_id, + size=[self.vocab_size, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.token_emb_name, initializer=self.param_initializer)) + emb_out, n_head_self_attn_mask = self._gen_input( + token_ids, type_ids, pos_ids, recognition_mask, aux_emb=mask_emb) + + recognition_out, checkpoints = self._encode(emb_out, n_head_self_attn_mask) + + recognition_feat = layers.slice(input=recognition_out, axes=[1], starts=[0], ends=[1]) + recognition_feat = layers.fc( + input=recognition_feat, + size=self.hidden_size, + act="tanh", + param_attr=fluid.ParamAttr(name="recognition_fc.w_0", initializer=self.param_initializer), + bias_attr="recognition_fc.b_0") + logits = layers.fc( + input=recognition_feat, + size=self.latent_type_size, + param_attr=fluid.ParamAttr(name=self.latent_emb_name, initializer=self.param_initializer), + bias_attr="recognition_bias") + return logits, checkpoints + + def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): + u = layers.uniform_random_batch_size_like(logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) + u.stop_gradient = True + gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) + y = logits + gumbel + return layers.softmax(y / tau) + + def forward(self, inputs, is_infer=False): + """ + Run model main forward. + """ + outputs = {} + if is_infer: + self.generation_caches = [{ + "k": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_key * self.n_head], dtype=self.dtype, value=0), + "v": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_value * self.n_head], dtype=self.dtype, value=0), + } for i in range(self.n_layer)] + else: + self.generation_caches = None + + latent_embeddings = layers.create_parameter( + shape=[self.emb_size, self.latent_type_size], + dtype=self.dtype, + attr=fluid.ParamAttr(name=self.latent_emb_name, initializer=self.param_initializer)) + + if is_infer: + latent_id = inputs["latent_id"] + weights = layers.one_hot(latent_id, self.latent_type_size) + else: + logits, recognition_checkpoints = self._recognition_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + recognition_mask=inputs["recognition_mask"], + ) + outputs["post_probs"] = layers.softmax(logits) + weights = self._gumbel_softmax(logits) + outputs["checkpoints"] = recognition_checkpoints + + latent_emb = layers.matmul(x=weights, y=latent_embeddings, transpose_y=True) + outputs["enc_out"], generation_checkpoints = self._generation_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + generation_mask=inputs["generation_mask"], + aux_emb=layers.unsqueeze(latent_emb, axes=[1]), + gather_idx=inputs.get("parent_idx", None), + ) + + if not is_infer: + outputs["checkpoints"].extend(generation_checkpoints) + return outputs + + def _calc_bow_logits(self, enc_out, checkpoints, bow_pos): + """Get the logits of generation.""" + bow_feat = layers.slice(input=enc_out, axes=[1], starts=[0], ends=[1]) + bow_feat = layers.reshape(x=bow_feat, shape=[-1, self.hidden_size]) + bow_pos = layers.cast(x=bow_pos, dtype="int32") + bow_feat = layers.gather(input=bow_feat, index=bow_pos) + + bow_trans_feat = layers.fc( + input=bow_feat, + size=self.emb_size, + act=self.hidden_act, + param_attr=fluid.ParamAttr(name="bow_trans_fc.w_0", initializer=self.param_initializer), + bias_attr=fluid.ParamAttr(name="bow_trans_fc.b_0")) + + bow_trans_feat = pre_process_layer(bow_trans_feat, self.post_cls_cmd, name="bow_trans") + + checkpoints.append(bow_trans_feat) + + if self.weight_sharing: + fc_out = layers.matmul( + x=bow_trans_feat, + y=fluid.default_main_program().global_block().var(self.token_emb_name), + transpose_y=True) + if self.cls_bias: + fc_out += layers.create_parameter( + shape=[self.vocab_size], + dtype=self.dtype, + attr=fluid.ParamAttr(name="bow_out_fc.b_0"), + is_bias=True) + else: + bow_out_bias_attr = fluid.ParamAttr(name="bow_out_fc.b_0") if self.cls_bias else False + fc_out = layers.fc( + input=bow_trans_feat, + size=self.vocab_size, + param_attr=fluid.ParamAttr(name="bow_out_fc.w_0", initializer=self.param_initializer), + bias_attr=bow_out_bias_attr) + return fc_out + + def _get_metrics(self, inputs, outputs): + metrics = super(Plato, self)._get_metrics(inputs, outputs) + + if self.use_bow: + fc_out = self._calc_bow_logits(outputs["enc_out"], outputs["checkpoints"], inputs["bow_pos"]) + bow_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["bow_label"]) + mean_bow_loss = layers.mean(bow_loss) + metrics["token_bow_loss"] = mean_bow_loss + metrics["loss"] = metrics["loss"] + mean_bow_loss + + entropy_loss = layers.reduce_sum(outputs["post_probs"] * layers.log(outputs["post_probs"]), dim=1) + mean_entropy_loss = layers.mean(entropy_loss) + metrics["entropy_loss"] = mean_entropy_loss + if self.use_entropy: + metrics["loss"] = metrics["loss"] + mean_entropy_loss + return metrics + + def infer_step(self, inputs): + """ + Run one inference step. + """ + if self.do_generation: + batch_size = len(inputs["data_id"]) + new_bsz = batch_size * self.latent_type_size + inputs = { + name: repeat_array_or_tensor(array_or_tensor, self.place, self.latent_type_size) + for name, array_or_tensor in inputs.items() + } + # Add latent_id + inputs["latent_id"] = np.array([i for i in range(self.latent_type_size) for _ in range(batch_size)], + dtype="int64").reshape([-1, 1]) + + return super(Plato, self).infer_step(inputs) + else: + return self._execute(self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict) diff --git a/modules/text/text_generation/plato2_en_base/models/transformer_block.py b/modules/text/text_generation/plato2_en_base/models/transformer_block.py new file mode 100644 index 0000000000000000000000000000000000000000..041306a591233d3e1bc1f1a1e11943d54414033c --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/transformer_block.py @@ -0,0 +1,332 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer block.""" + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + gather_idx=None, + store=False, + param_initializer=None, + name="multi_head_att"): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_query_fc.w_0", initializer=param_initializer), + bias_attr=name + "_query_fc.b_0") + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_key_fc.w_0", initializer=param_initializer), + bias_attr=name + "_key_fc.b_0") + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_value_fc.w_0", initializer=param_initializer), + bias_attr=name + "_value_fc.b_0") + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product, use_cudnn=True) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + cache_k, cache_v = cache["k"], cache["v"] + select_k = layers.gather(cache_k, index=gather_idx) + select_v = layers.gather(cache_v, index=gather_idx) + select_k = layers.reshape(select_k, shape=[0, 0, d_key * n_head]) + select_v = layers.reshape(select_v, shape=[0, 0, d_value * n_head]) + if store: + k = layers.concat([select_k, k], axis=1) + v = layers.concat([select_v, v], axis=1) + layers.assign(k, cache["k"]) + layers.assign(v, cache["v"]) + else: + #k = select_k + #v = select_v + tmp_k = layers.concat([select_k, k[:, :1]], axis=1) + tmp_v = layers.concat([select_v, v[:, :1]], axis=1) + layers.assign(tmp_k, cache["k"]) + layers.assign(tmp_v, cache["v"]) + k = layers.concat([select_k, k], axis=1) + v = layers.concat([select_v, v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_output_fc.w_0", initializer=param_initializer), + bias_attr=name + "_output_fc.b_0") + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name="ffn"): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + "_fc_0.w_0", initializer=param_initializer), + bias_attr=name + "_fc_0.b_0") + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_fc_1.w_0", initializer=param_initializer), + bias_attr=name + "_fc_1.b_0") + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., epsilon=1e-5, name=""): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + "_layer_norm_scale", initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + "_layer_norm_bias", initializer=fluid.initializer.Constant(0.)), + epsilon=epsilon) + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name="", + epsilon=1e-5, + cache=None, + gather_idx=None, + store=False): + """ + The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the pre_process_layer / post_process_layer to add residual connection, + layer normalization and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(input, preprocess_cmd, prepostprocess_dropout, epsilon=epsilon, name=name + "_pre_att"), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + "_multi_head_att", + cache=cache, + gather_idx=gather_idx, + store=store) + attn_output = post_process_layer( + input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + "_post_att", epsilon=epsilon) + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, epsilon=epsilon, name=name + "_pre_ffn"), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + "_ffn") + ffd_output = post_process_layer( + attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + "_post_ffn", epsilon=epsilon) + return ffd_output, [attn_output, ffd_output] + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name="", + epsilon=1e-5, + n_layer_per_block=1, + param_share="normal", + caches=None, + gather_idx=None, + store=False): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + checkpoints = [] + names = [] + if param_share == "inner_share": + for _ in range(n_layer // n_layer_per_block): + for i in range(n_layer_per_block): + names.append(name + "_layer_" + str(i)) + else: + for i in range(n_layer // n_layer_per_block): + for _ in range(n_layer_per_block): + names.append(name + "_layer_" + str(i)) + + for i in range(n_layer): + enc_output, cps = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + epsilon=epsilon, + name=names[i], + cache=caches[i] if caches is not None else None, + gather_idx=gather_idx, + store=store) + checkpoints.extend(cps) + enc_input = enc_output + enc_output = pre_process_layer( + enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder", epsilon=epsilon) + + return enc_output, checkpoints diff --git a/modules/text/text_generation/plato2_en_base/models/unified_transformer.py b/modules/text/text_generation/plato2_en_base/models/unified_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..a1decd26950cc20ee6ff1857f8132ced16b50c86 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/models/unified_transformer.py @@ -0,0 +1,378 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unified Transformer model.""" + +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from . import register_model +from .model_base import Model +from .transformer_block import encoder, pre_process_layer +from plato2_en_base.utils.args import str2bool +from plato2_en_base.utils import repeat_array_or_tensor, slice_array_or_tensor +from .generator import Generator + + +@register_model("UnifiedTransformer") +class UnifiedTransformer(Model): + """Unified Transformer""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = Model.add_cmdline_args(parser) + group.add_argument("--max_seq_len", type=int, default=256) + group.add_argument("--weight_sharing", type=str2bool, default=True) + group.add_argument("--mem_efficient", type=str2bool, default=False) + + Generator.add_cmdline_args(parser) + return group + + def __init__(self, args, place): + self.max_seq_len = args.max_seq_len + + self.emb_size = args.emb_size or args.hidden_size + self.hidden_size = args.hidden_size + + self.n_layer = args.num_hidden_layers + self.n_head = args.num_attention_heads + self.d_key = args.get("key_size", self.hidden_size // self.n_head) + self.d_value = args.get("value_size", self.hidden_size // self.n_head) + self.inner_hidden_size = args.get("inner_hidden_size", self.hidden_size * 4) + + self.vocab_size = args.vocab_size + self.max_position_seq_len = args.max_position_embeddings + self.type_size = args.type_vocab_size + self.token_emb_name = "word_embedding" + self.type_emb_name = "sent_embedding" + self.pos_emb_name = "pos_embedding" + + self.epsilon = args.epsilon or 1e-5 + self.n_layer_per_block = args.n_layer_per_block or 1 + self.pre_encoder_cmd = args.get("pre_encoder_cmd", "nd") + self.preprocess_cmd = args.get("preprocess_cmd", "") + self.postprocess_cmd = args.get("postprocess_cmd", "dan") + self.post_cls_cmd = args.get("post_cls_cmd", "n") + self.cls_bias = args.get("cls_bias", True) + if self.hidden_size != self.emb_size: + self.emb_mapping_in = True + else: + self.emb_mapping_in = args.get("emb_mapping_in", False) + + self.hidden_act = args.hidden_act + self.prepostprocess_dropout = args.hidden_dropout_prob + self.attention_dropout = args.attention_probs_dropout_prob + self.weight_sharing = args.weight_sharing + + self.mem_efficient = args.mem_efficient + + self.dtype = "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self.param_initializer = fluid.initializer.TruncatedNormal(scale=args.initializer_range) + + # task-related + self.generator = Generator(args) + self.do_generation = args.do_generation + + super(UnifiedTransformer, self).__init__(args, place) + + def _gen_input(self, token_ids, type_ids, pos_ids, input_mask, aux_emb=None): + token_emb_out = layers.embedding( + input=token_ids, + size=[self.vocab_size, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.token_emb_name, initializer=self.param_initializer)) + type_emb_out = layers.embedding( + input=type_ids, + size=[self.type_size, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.type_emb_name, initializer=self.param_initializer)) + pos_emb_out = layers.embedding( + input=pos_ids, + size=[self.max_position_seq_len, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.pos_emb_name, initializer=self.param_initializer)) + emb_out = token_emb_out + type_emb_out + pos_emb_out + + # auxiliary memory embeddings + if aux_emb is not None: + emb_out = layers.concat([aux_emb, emb_out], axis=1) + + # post process of embedding + emb_out = pre_process_layer( + emb_out, self.pre_encoder_cmd, self.prepostprocess_dropout, name="pre_encoder", epsilon=self.epsilon) + if self.emb_mapping_in: + emb_out = layers.fc( + input=emb_out, + num_flatten_dims=2, + size=self.hidden_size, + param_attr=fluid.ParamAttr(name="emb_hidden_mapping", initializer=self.param_initializer), + bias_attr="emb_hidden_mapping_bias") + + # generate n-head self-attention mask + self_attn_mask = input_mask + self_attn_mask = layers.scale(x=self_attn_mask, scale=1e4, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = layers.stack(x=[self_attn_mask] * self.n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + return emb_out, n_head_self_attn_mask + + def _get_pooled_output(self, enc_out, pos): + enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) + pos = layers.cast(x=pos, dtype="int32") + feat = layers.gather(input=enc_out, index=pos) + + pooled_out = layers.fc( + input=feat, + size=self.hidden_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self.param_initializer), + bias_attr="pooled_fc.b_0") + return pooled_out + + def _generation_network(self, token_ids, type_ids, pos_ids, generation_mask, aux_emb=None, gather_idx=None): + emb_out, n_head_self_attn_mask = self._gen_input(token_ids, type_ids, pos_ids, generation_mask, aux_emb=aux_emb) + return self._encode(emb_out, n_head_self_attn_mask, self.generation_caches, gather_idx=gather_idx) + + def _encode(self, emb_out, n_head_self_attn_mask, caches=None, gather_idx=None): + return encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self.n_layer, + n_head=self.n_head, + d_key=self.d_key, + d_value=self.d_value, + d_model=self.hidden_size, + d_inner_hid=self.inner_hidden_size, + prepostprocess_dropout=self.prepostprocess_dropout, + attention_dropout=self.attention_dropout, + relu_dropout=0, + hidden_act=self.hidden_act, + preprocess_cmd=self.preprocess_cmd, + postprocess_cmd=self.postprocess_cmd, + param_initializer=self.param_initializer, + epsilon=self.epsilon, + n_layer_per_block=self.n_layer_per_block, + name="encoder", + caches=caches, + gather_idx=gather_idx, + store=caches is not None) + + def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): + u = layers.uniform_random_batch_size_like(logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) + u.stop_gradient = True + gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) + y = logits + gumbel + return layers.softmax(y / tau) + + def _get_feed_dict(self, is_infer=False): + """ + Get the feed list of the model. + + Args: + is_infer(bool): True if running inference. + + Returns: + list(Variable): The feed list. + list(str): The name of each Variable in feed list. + """ + feed_dict = {} + feed_dict["token_ids"] = layers.data(name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["type_ids"] = layers.data(name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["pos_ids"] = layers.data(name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + + feed_dict["generation_mask"] = layers.data( + name="generation_mask", shape=[-1, self.max_seq_len, self.max_seq_len], dtype=self.dtype) + + if is_infer: + feed_dict["tgt_ids"] = layers.data( + name="tgt_ids", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["tgt_pos"] = layers.data( + name="tgt_pos", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["init_score"] = layers.data(name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) + feed_dict["parent_idx"] = layers.data(name="parent_idx", shape=[-1], dtype="int64") + + feed_dict["tgt_generation_mask"] = layers.data( + name="tgt_generation_mask", shape=[-1, 1, self.max_seq_len], dtype="float32") + else: + feed_dict["tgt_label"] = layers.data(name="tgt_label", shape=[-1, 1], dtype="int64") + feed_dict["tgt_pos"] = layers.data(name="tgt_pos", shape=[-1, 1], dtype="int64") + + feed_dict["data_id"] = layers.data(name="data_id", shape=[-1, 1], dtype="int64") + return feed_dict + + def forward(self, inputs, is_infer=False): + """ + Run model main forward. + """ + outputs = {} + if is_infer: + self.generation_caches = [{ + "k": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_key * self.n_head], dtype=self.dtype, value=0), + "v": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_value * self.n_head], dtype=self.dtype, value=0), + } for i in range(self.n_layer)] + else: + self.generation_caches = None + + outputs["enc_out"], generation_checkpoints = self._generation_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + generation_mask=inputs["generation_mask"], + gather_idx=inputs.get("parent_idx", None)) + + if not is_infer: + outputs["checkpoints"] = generation_checkpoints + return outputs + + def _calc_logits(self, enc_out, checkpoints=None, seq_pos=None): + """Get the logits of generation.""" + enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) + if seq_pos is not None: + seq_pos = layers.cast(x=seq_pos, dtype="int32") + seq_feat = layers.gather(input=enc_out, index=seq_pos) + else: + seq_feat = enc_out + + seq_trans_feat = layers.fc( + input=seq_feat, + size=self.emb_size, + act=self.hidden_act, + param_attr=fluid.ParamAttr(name="mask_lm_trans_fc.w_0", initializer=self.param_initializer), + bias_attr=fluid.ParamAttr(name="mask_lm_trans_fc.b_0")) + + seq_trans_feat = pre_process_layer(seq_trans_feat, self.post_cls_cmd, name="mask_lm_trans") + + if checkpoints is not None: + checkpoints.append(seq_trans_feat) + + if self.weight_sharing: + fc_out = layers.matmul( + x=seq_trans_feat, + y=fluid.default_main_program().global_block().var(self.token_emb_name), + transpose_y=True) + if self.cls_bias: + fc_out += layers.create_parameter( + shape=[self.vocab_size], + dtype=self.dtype, + attr=fluid.ParamAttr(name="mask_lm_out_fc.b_0"), + is_bias=True) + else: + seq_out_bias_attr = fluid.ParamAttr(name="mask_lm_out_fc.b_0") if self.cls_bias else False + fc_out = layers.fc( + input=seq_trans_feat, + size=self.vocab_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self.param_initializer), + bias_attr=seq_out_bias_attr) + return fc_out + + def _get_metrics(self, inputs, outputs): + metrics = {} + + fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"]) + tgt_lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_label"]) + mean_tgt_lm_loss = layers.mean(tgt_lm_loss) + loss = mean_tgt_lm_loss + metrics["token_lm_loss"] = mean_tgt_lm_loss + + metrics["loss"] = loss + return metrics + + def _get_statistics(self, inputs, outputs): + statistics = {} + if "tgt_label" in inputs: + statistics["tokens_num"] = layers.reduce_sum( + layers.fill_constant_batch_size_like(input=inputs["tgt_label"], value=1.0, shape=[-1], dtype="int64")) + statistics["batch_size"] = layers.reduce_sum( + layers.fill_constant_batch_size_like(input=inputs["token_ids"], value=1.0, shape=[-1], dtype="int64")) + return statistics + + def get_metrics_and_statistics(self, inputs, outputs): + """ + Get metrics and statistics. + """ + metrics = self._get_metrics(inputs, outputs) + statistics = self._get_statistics(inputs, outputs) + return metrics, statistics + + def infer(self, inputs, outputs): + """ + Run model inference. + """ + if self.do_generation: + return self.generator.inference(self, inputs, outputs) + else: + raise NotImplementedError + + def _run_generation(self, inputs): + """ + Run generation. + """ + batch_size = len(inputs["data_id"]) + inputs["parent_idx"] = np.array(range(batch_size), dtype="int64") + outputs = self._execute( + self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict, return_numpy=False) + + predictions = [] + data_id_list = np.array(outputs["data_id"]).reshape(-1).tolist() + token_ids_list = np.array(outputs["token_ids"]).squeeze(2).tolist() + seq_ids = outputs["finished_ids"] + seq_ids_np = np.array(outputs["finished_ids"]) + seq_scores_np = np.array(outputs["finished_scores"]) + for i, (data_id, token_ids) in enumerate(zip(data_id_list, token_ids_list)): + start = seq_ids.lod()[0][i] + end = seq_ids.lod()[0][i + 1] + for j in range(start, end): + sub_start = seq_ids.lod()[1][j] + sub_end = seq_ids.lod()[1][j + 1] + info = {} + info["data_id"] = data_id + info["decode_score"] = float(seq_scores_np[sub_end - 1]) + info["context_token_ids"] = token_ids + info["response_token_ids"] = seq_ids_np[sub_start:sub_end].tolist() + predictions.append(info) + return predictions + + def infer_step(self, inputs): + """ + Run one inference step. + """ + if self.do_generation: + if self.generator.num_samples: + inputs = { + name: repeat_array_or_tensor(array_or_tensor, self.place, self.generator.num_samples) + for name, array_or_tensor in inputs.items() + } + + if self.mem_efficient: + predictions = [] + for idx in range(0, len(inputs["data_id"]), self.batch_size): + part_inputs = { + name: slice_array_or_tensor(array_or_tensor, self.place, idx, idx + self.batch_size) + for name, array_or_tensor in inputs.items() + } + part_outputs = self._run_generation(part_inputs) + predictions.extend(part_outputs) + else: + predictions = self._run_generation(inputs) + return predictions + else: + return self._execute(self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict) diff --git a/modules/text/text_generation/plato2_en_base/module.py b/modules/text/text_generation/plato2_en_base/module.py new file mode 100644 index 0000000000000000000000000000000000000000..1ad2ef9f07071572275789fdd3c3da196769692b --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/module.py @@ -0,0 +1,180 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import json +import sys +import argparse +import contextlib +from collections import namedtuple + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import plato2_en_base.models as plato_models +from plato2_en_base.tasks.dialog_generation import DialogGeneration +from plato2_en_base.utils import check_cuda, Timer +from plato2_en_base.utils.args import parse_args + + +@moduleinfo( + name="plato2_en_base", + version="1.0.0", + summary= + "A novel pre-training model for dialogue generation, incorporated with latent discrete variables for one-to-many relationship modeling.", + author="baidu-nlp", + author_email="", + type="nlp/text_generation", +) +class Plato(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + if "CUDA_VISIBLE_DEVICES" not in os.environ: + raise RuntimeError("The module only support GPU. Please set the environment variable CUDA_VISIBLE_DEVICES.") + + args = self.setup_args() + self.task = DialogGeneration(args) + self.model = plato_models.create_model(args, fluid.CUDAPlace(0)) + self.Example = namedtuple("Example", ["src", "data_id"]) + self._interactive_mode = False + + def setup_args(self): + """ + Setup arguments. + """ + assets_path = os.path.join(self.directory, "assets") + vocab_path = os.path.join(assets_path, "vocab.txt") + init_pretraining_params = os.path.join(assets_path, "24L", "Plato") + spm_model_file = os.path.join(assets_path, "spm.model") + nsp_inference_model_path = os.path.join(assets_path, "24L", "NSP") + config_path = os.path.join(assets_path, "24L.json") + + # ArgumentParser.parse_args use argv[1:], it will drop the first one arg, so the first one in sys.argv should be "" + sys.argv = [ + "", "--model", "Plato", "--vocab_path", + "%s" % vocab_path, "--do_lower_case", "False", "--init_pretraining_params", + "%s" % init_pretraining_params, "--spm_model_file", + "%s" % spm_model_file, "--nsp_inference_model_path", + "%s" % nsp_inference_model_path, "--ranking_score", "nsp_score", "--do_generation", "True", "--batch_size", + "1", "--config_path", + "%s" % config_path + ] + + parser = argparse.ArgumentParser() + plato_models.add_cmdline_args(parser) + DialogGeneration.add_cmdline_args(parser) + args = parse_args(parser) + + args.load(args.config_path, "Model") + args.run_infer = True # only build infer program + + return args + + @serving + def generate(self, texts): + """ + Get the robot responses of the input texts. + + Args: + texts(list or str): If not in the interactive mode, texts should be a list in which every element is the chat context separated with '\t'. + Otherwise, texts shoule be one sentence. The module can get the context automatically. + + Returns: + results(list): the robot responses. + """ + if not texts: + return [] + if self._interactive_mode: + if isinstance(texts, str): + self.context.append(texts.strip()) + texts = [" [SEP] ".join(self.context[-self.max_turn:])] + else: + raise ValueError("In the interactive mode, the input data should be a string.") + elif not isinstance(texts, list): + raise ValueError("If not in the interactive mode, the input data should be a list.") + + bot_responses = [] + for i, text in enumerate(texts): + example = self.Example(src=text.replace("\t", " [SEP] "), data_id=i) + record = self.task.reader._convert_example_to_record(example, is_infer=True) + data = self.task.reader._pad_batch_records([record], is_infer=True) + pred = self.task.infer_step(self.model, data)[0] # batch_size is 1 + bot_response = pred["response"] # ignore data_id and score + bot_responses.append(bot_response) + + if self._interactive_mode: + self.context.append(bot_responses[0].strip()) + return bot_responses + + @contextlib.contextmanager + def interactive_mode(self, max_turn=6): + """ + Enter the interactive mode. + + Args: + max_turn(int): the max dialogue turns. max_turn = 1 means the robot can only remember the last one utterance you have said. + """ + self._interactive_mode = True + self.max_turn = max_turn + self.context = [] + yield + self.context = [] + self._interactive_mode = False + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data) + + return results + + +if __name__ == "__main__": + module = Plato() + for result in module.generate(["Hello", "Hello\thi, nice to meet you, my name is tom\tso your name is tom?"]): + print(result) + with module.interactive_mode(max_turn=3): + while True: + human_utterance = input() + robot_utterance = module.generate(human_utterance) + print("Robot: %s" % robot_utterance[0]) diff --git a/modules/text/text_generation/plato2_en_base/readers/dialog_reader.py b/modules/text/text_generation/plato2_en_base/readers/dialog_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..a3af8aaef27d4d3687a854b362f3ff85a6b07b9a --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/readers/dialog_reader.py @@ -0,0 +1,442 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Dialogue Reader.""" + +import csv +from collections import namedtuple +from contextlib import contextmanager +import gzip +import os + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.incubate.fleet.collective import fleet + +from plato2_en_base.utils import pad_batch_data +from plato2_en_base.utils.args import str2bool +from plato2_en_base.utils.masking import mask +import plato2_en_base.utils.tokenization as tokenization + + +class DialogReader(object): + """The implement of DialogReader.""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Reader") + group.add_argument("--max_src_len", type=int, default=128) + group.add_argument("--max_tgt_len", type=int, default=128) + group.add_argument("--truncate_first_turn", type=str2bool, default=False) + group.add_argument("--file_format", type=str, default="file", choices=["file", "filelist"]) + group.add_argument("--data_format", type=str, default="raw", choices=["raw", "tokenized", "numerical"]) + group.add_argument("--in_tokens", type=str2bool, default=False) + group.add_argument("--batch_size", type=int, default=16) + group.add_argument("--continuous_position", type=str2bool, default=True) + group.add_argument("--random_seed", type=int, default=11) + group.add_argument("--sort_pool_size", type=int, default=2**16) + + group = parser.add_argument_group("Tokenizer") + group.add_argument("--tokenizer", type=str, default="SentencePieceTokenizer") + args, _ = parser.parse_known_args() + tokenizer_cls = getattr(tokenization, args.tokenizer) + tokenizer_cls.add_cmdline_args(parser) + return group + + def __init__(self, args): + tokenizer_cls = getattr(tokenization, args.tokenizer) + self.tokenizer = tokenizer_cls(args) + self.vocab = self.tokenizer.vocab + self.pad_id = args.pad_id = self.vocab["[PAD]"] + self.bos_id = args.bos_id = self.vocab["[CLS]"] + self.eos_id = args.eos_id = self.vocab["[SEP]"] + self.unk_id = args.unk_id = self.vocab["[UNK]"] + self.mask_id = args.mask_id = self.vocab["[MASK]"] + self.vocab_size = args.get("vocab_size", 0) + self.max_src_len = args.max_src_len + self.max_tgt_len = args.max_tgt_len + self.truncate_first_turn = args.truncate_first_turn + self.file_format = args.file_format + self.data_format = args.data_format + self.in_tokens = args.in_tokens + self.batch_size = args.batch_size + self.continuous_position = args.continuous_position + self.sort_pool_size = args.sort_pool_size + + # random_seed must be set for data slicing when using multi-gpu + self.global_rng = np.random.RandomState(args.random_seed) + + # training progress + self.current_example = 0 + self.current_epoch = 0 + self.num_examples = 0 + + # model related + + self.fields = ["token_ids", "type_ids", "pos_ids"] + self.num_numerical_fields = len(self.fields) + self.fields += ["tgt_start_idx", "data_id"] + self.sort_key = lambda record: [len(record.token_ids)] + + self.Record = namedtuple("Record", self.fields, defaults=(None, ) * len(self.fields)) + + self.features = {} + return + + def get_train_progress(self): + """Gets progress for training phase.""" + return self.current_epoch, self.current_file_index, self.total_file + + def _convert_example_to_record(self, example, is_infer): + # process src + src_token_ids = [] + src_pos_ids = [] + + # tokenize src + s_token_ids_list = [] + for s in example.src.split("[SEP]"): + s = tokenization.convert_to_unicode(s).strip() + + if self.data_format == "tokenized": + s_tokens = s.split(" ") + else: + s_tokens = self.tokenizer.tokenize(s) + + s_token_ids = self.tokenizer.convert_tokens_to_ids(s_tokens) + [self.eos_id] + s_token_ids_list.append(s_token_ids) + + # trim src + idx = len(s_token_ids_list) - 1 + total_token_num = 1 + while idx >= 0: + total_token_num += len(s_token_ids_list[idx]) + if total_token_num > self.max_src_len: + if self.truncate_first_turn and idx == 0: + truncated_ids = s_token_ids_list[idx][:self.max_src_len - total_token_num] + if len(truncated_ids) > 1: + s_token_ids_list[idx] = truncated_ids[:-1] + [self.eos_id] + idx -= 1 + break + idx -= 1 + + for i, s_token_ids in enumerate(s_token_ids_list[idx + 1:], idx + 1): + src_token_ids += s_token_ids + src_pos_ids += list(range(1, len(s_token_ids) + 1)) + + src_token_ids = [self.bos_id] + src_token_ids + src_type_ids = [0] * len(src_token_ids) + src_pos_ids = [0] + src_pos_ids + assert len(src_token_ids) == len(src_type_ids) == len(src_pos_ids), \ + "not len(src_token_ids) == len(src_type_ids) == len(src_pos_ids)" + + token_ids = src_token_ids + type_ids = src_type_ids + pos_ids = src_pos_ids + tgt_start_idx = len(token_ids) + + if not is_infer: + # process tgt + # tokenize tgt + tgt = tokenization.convert_to_unicode(example.tgt).strip() + if self.data_format == "tokenized": + tgt_tokens = tgt.split(" ") + else: + tgt_tokens = self.tokenizer.tokenize(tgt) + + tgt_token_ids = self.tokenizer.convert_tokens_to_ids(tgt_tokens) + tgt_token_ids.append(self.eos_id) + + # trim tgt + if len(tgt_token_ids) > self.max_tgt_len - 1: + tgt_token_ids = tgt_token_ids[:self.max_tgt_len - 1] + + tgt_token_ids = [self.bos_id] + tgt_token_ids + tgt_type_ids = [1] * len(tgt_token_ids) + tgt_pos_ids = list(range(1, len(tgt_token_ids) + 1)) + assert len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids), \ + "not len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids)" + + token_ids += tgt_token_ids + type_ids += tgt_type_ids + pos_ids += tgt_pos_ids + + assert len(token_ids) == len(type_ids) == len(pos_ids), \ + "not len(token_ids) == len(type_ids) == len(pos_ids)" + + if self.continuous_position: + src_pos_ids = list(range(len(src_token_ids))) + if not is_infer: + tgt_pos_ids = list(range(len(tgt_token_ids))) + pos_ids = list(range(len(token_ids))) + + field_values = {"token_ids": src_token_ids, "type_ids": src_type_ids, "pos_ids": src_pos_ids} + field_values["tgt_start_idx"] = tgt_start_idx + field_values["data_id"] = example.data_id + + record = self.Record(**field_values) + return record + + def _read_tsv(self, fp, phase, is_infer, delimiter="\t", quotechar=None): + """Reads a tab separated value file.""" + csv.field_size_limit(2**20) + reader = csv.reader(fp, delimiter=delimiter, quotechar=quotechar) + headers = next(reader) + headers.append("data_id") + Example = namedtuple("Example", headers) + + for i, line in enumerate(reader): + example = Example(*line, data_id=i) + if is_infer or phase.endswith("test"): + self.features[phase][i] = example + record = self._convert_example_to_record(example, is_infer) + yield record + + def _read_numerical_file(self, fp, delimiter=";"): + for i, line in enumerate(fp): + cols = tokenization.convert_to_unicode(line).strip().split(delimiter) + cols = list(map(lambda x: list(map(int, x.split(" "))), cols)) + if len(cols) > self.num_numerical_fields: + cols = cols[:self.num_numerical_fields] + tgt_start_idx = cols[0].index(self.bos_id, 1) + record = self.Record(*cols, tgt_start_idx=tgt_start_idx, data_id=i) + yield record + + def _read_file(self, input_file, phase, is_infer): + def __wrapper__(): + with open_file(input_file) as fp: + if self.data_format == "numerical": + records = self._read_numerical_file(fp) + else: + records = self._read_tsv(fp, phase, is_infer) + for record in records: + yield record + + return __wrapper__ + + def _read_files(self, filelist, phase, is_infer, shuffle_files): + input_files = open(filelist).readlines() + + def __wrapper__(): + if shuffle_files: + self.global_rng.shuffle(input_files) + + if phase == "train": + self.total_file = len(input_files) + for file_index, input_file in enumerate(input_files, 1): + if phase == "train": + self.current_file_index = file_index + self.current_file = input_file + file_reader = self._read_file(input_file.strip(), phase, is_infer) + for record in file_reader(): + yield record + + return __wrapper__ + + def _batch_reader(self, reader, phase=None, is_infer=False, sort_pool_size=2**16): + """Construct a batch reader.""" + + def update_max_lens(max_lens, record): + """Update max_lens.""" + if max_lens is None: + return self.sort_key(record) + else: + return [max(max_len, l) for max_len, l in zip(max_lens, self.sort_key(record))] + + def get_batch(reader): + """Generate batches from reader.""" + batch, max_lens = [], None + for record in reader(): + if record is None: + yield batch + batch, max_lens = [], None + continue + + self.current_example += 1 + max_lens = update_max_lens(max_lens, record) + if self.in_tokens: + to_append = (len(batch) + 1) * sum(max_lens) <= self.batch_size + else: + to_append = len(batch) < self.batch_size + if to_append: + batch.append(record) + else: + yield batch + batch, max_lens = [record], self.sort_key(record) + + if len(batch) > 0: + yield batch + + def get_sorted_batch(pool): + """Generate sorted batches from pool.""" + pool = sorted(pool, key=self.sort_key) + batches = [] + batch, max_lens = [], None + for record in pool: + self.current_example += 1 + max_lens = update_max_lens(max_lens, record) + if self.in_tokens: + to_append = (len(batch) + 1) * sum(max_lens) <= self.batch_size + else: + to_append = len(batch) < self.batch_size + if to_append: + batch.append(record) + else: + batches.append(batch) + batch, max_lens = [record], self.sort_key(record) + + if len(batch) > 0: + batches.append(batch) + self.global_rng.shuffle(batches) + + for batch in batches: + yield batch + + def __wrapper__(): + if sort_pool_size > 0: + pool = [] + for record in reader(): + pool.append(record) + if len(pool) == sort_pool_size: + for batch in get_sorted_batch(pool): + yield batch + pool = [] + if len(pool) > 0: + for batch in get_sorted_batch(pool): + yield batch + else: + for batch in get_batch(reader): + yield batch + + return __wrapper__ + + def _distributed_batch_reader(self, batch_reader, num_part, part_id, is_test=False): + def __wrapper__(): + batches = [] + for batch in batch_reader(): + batches.append(batch) + if len(batches) == num_part: + yield batches[part_id] + batches = [] + if is_test and 0 <= part_id < len(batches): + yield batches[part_id] + return + + return __wrapper__ + + def data_generator(self, + input_file=None, + reader=None, + num_epochs=1, + num_part=1, + part_id=0, + phase=None, + is_infer=False): + """Data generator.""" + + def __wrapper__(): + if is_infer or phase.endswith("test"): + self.features[phase] = {} + + nonlocal reader + if reader is None: + if self.file_format == "filelist": + reader = self._read_files(input_file, phase, is_infer, not phase.endswith("test")) + else: + if phase == "train": + self.total_file = 1 + self.current_file_index = 1 + self.current_file = input_file + reader = self._read_file(input_file, phase, is_infer) + + batch_reader = self._batch_reader( + reader, phase, is_infer, sort_pool_size=self.sort_pool_size if not is_infer else 0) + if phase == "train": + batch_reader = self._distributed_batch_reader(batch_reader, num_part, part_id) + elif phase.startswith("distributed"): + batch_reader = self._distributed_batch_reader(batch_reader, num_part, part_id, is_test=True) + + for epoch_index in range(num_epochs): + if phase == "train": + self.current_example = 0 + self.current_epoch = epoch_index + 1 + for batch in batch_reader(): + yield self._pad_batch_records(batch, is_infer) + + return __wrapper__ + + def _gen_self_attn_mask(self, batch_token_ids, batch_tgt_start_idx=None, is_unidirectional=True, shift_len=0): + max_len = max(map(len, batch_token_ids)) + input_mask_data = np.zeros((len(batch_token_ids), max_len + shift_len, max_len + shift_len)) + if is_unidirectional: + for index, mask_data in enumerate(input_mask_data): + start = 0 if batch_tgt_start_idx is None else batch_tgt_start_idx[index] + end = len(batch_token_ids[index]) + mask_data[:end + shift_len, :start + shift_len] = 1.0 + # Generate the lower triangular matrix using the slice of matrix + b = np.tril(np.ones([end - start, end - start]), 0) + mask_data[start + shift_len:end + shift_len, start + shift_len:end + shift_len] = b + else: + for index, token_ids in enumerate(batch_token_ids): + input_mask_data[index, :len(token_ids) + shift_len, :len(token_ids) + shift_len] = 1.0 + return input_mask_data.astype("float32") + + def _pad_batch_records(self, batch_records, is_infer): + """ + Padding batch records and construct model's inputs. + """ + batch_size = len(batch_records) + batch = {} + batch_token_ids = [record.token_ids for record in batch_records] + batch_type_ids = [record.type_ids for record in batch_records] + batch_pos_ids = [record.pos_ids for record in batch_records] + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + + batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] + batch["generation_mask"] = self._gen_self_attn_mask(batch_token_ids, batch_tgt_start_idx=batch_tgt_start_idx) + + if is_infer: + tgt_ids = np.array([[[self.bos_id]]] * len(batch_token_ids), dtype="int64") + if self.continuous_position: + tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") + else: + tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") + tgt_pos = tgt_pos.reshape(-1, 1, 1) + batch["init_score"] = np.zeros_like(tgt_ids, dtype="float32").reshape(-1, 1).tolist() + batch["tgt_ids"] = tgt_ids.tolist() + batch["tgt_pos"] = tgt_pos.tolist() + + batch["tgt_generation_mask"] = batch["generation_mask"][:, 0:1, :].astype("float32") + else: + batch["tgt_label"], batch["tgt_pos"] = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + sent_b_starts=batch_tgt_start_idx, + is_unidirectional=True) + + batch_data_id = [record.data_id for record in batch_records] + batch["data_id"] = np.array(batch_data_id).astype("int64").reshape([-1, 1]) + return batch + + +@contextmanager +def open_file(filename): + """Open file.""" + if filename.endswith(".gz"): + fp = gzip.open(filename, "rt") + else: + fp = open(filename) + yield fp + fp.close() diff --git a/modules/text/text_generation/plato2_en_base/readers/nsp_reader.py b/modules/text/text_generation/plato2_en_base/readers/nsp_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..1914865fdf4d8b6a1f261a994e2f507fb22ee893 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/readers/nsp_reader.py @@ -0,0 +1,148 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NSP Reader.""" + +from collections import namedtuple + +import numpy as np + +from plato2_en_base.readers.dialog_reader import DialogReader +from plato2_en_base.utils import pad_batch_data +from plato2_en_base.utils.args import str2bool +from plato2_en_base.utils.masking import mask + + +class NSPReader(DialogReader): + """NSP Reader.""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = DialogReader.add_cmdline_args(parser) + group.add_argument( + "--attention_style", type=str, default="bidirectional", choices=["bidirectional", "unidirectional"]) + group.add_argument("--mix_negative_sample", type=str2bool, default=False) + return group + + def __init__(self, args): + super(NSPReader, self).__init__(args) + self.fields.append("label") + self.Record = namedtuple("Record", self.fields, defaults=(None, ) * len(self.fields)) + + self.attention_style = args.attention_style + self.mix_negative_sample = args.mix_negative_sample + return + + def _convert_example_to_record(self, example, is_infer): + record = super(NSPReader, self)._convert_example_to_record(example, False) + if "label" in example._fields: + record = record._replace(label=int(example.label)) + return record + + def _mix_negative_sample(self, reader, neg_pool_size=2**16): + def gen_from_pool(pool): + num_samples = len(pool) + if num_samples == 1: + # only one sample: it is impossible to generate negative sample + yield pool[0]._replace(label=1) + return + self.global_rng.shuffle(pool) + for i in range(num_samples): + pool[i] = pool[i]._replace(label=1) + j = (i + 1) % num_samples + idx_i = pool[i].tgt_start_idx + idx_j = pool[j].tgt_start_idx + field_values = {} + field_values["token_ids"] = pool[i].token_ids[:idx_i] + pool[j].token_ids[idx_j:] + field_values["type_ids"] = pool[i].type_ids[:idx_i] + pool[j].type_ids[idx_j:] + field_values["pos_ids"] = list(range(len(field_values["token_ids"]))) + neg_record = self.Record(**field_values, tgt_start_idx=idx_i, data_id=-1, label=0) + pool.append(neg_record) + assert len(neg_record.token_ids) <= self.max_seq_len + self.global_rng.shuffle(pool) + for record in pool: + yield record + + def __wrapper__(): + pool = [] + for record in reader(): + pool.append(record) + if len(pool) == neg_pool_size: + for record in gen_from_pool(pool): + yield record + pool = [] + if len(pool) > 0: + for record in gen_from_pool(pool): + yield record + + return __wrapper__ + + def _batch_reader(self, reader, phase=None, is_infer=False, sort_pool_size=2**16): + if self.mix_negative_sample: + reader = self._mix_negative_sample(reader) + return super(NSPReader, self)._batch_reader( + reader, phase=phase, is_infer=is_infer, sort_pool_size=sort_pool_size) + + def _pad_batch_records(self, batch_records, is_infer): + """ + Padding batch records and construct model's inputs. + """ + batch = {} + batch_token_ids = [record.token_ids for record in batch_records] + batch_type_ids = [record.type_ids for record in batch_records] + batch_pos_ids = [record.pos_ids for record in batch_records] + batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] + batch_label = [record.label for record in batch_records] + + if self.attention_style == "unidirectional": + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + tgt_label, tgt_pos, label_pos = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + bos_id=self.bos_id, + sent_b_starts=batch_tgt_start_idx, + labels=batch_label, + is_unidirectional=True) + attention_mask = self._gen_self_attn_mask(batch_token_ids, batch_tgt_start_idx) + else: + batch_mask_token_ids, tgt_label, tgt_pos, label_pos = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + bos_id=self.bos_id, + eos_id=self.eos_id, + mask_id=self.mask_id, + sent_b_starts=batch_tgt_start_idx, + labels=batch_label, + is_unidirectional=False) + if not is_infer: + batch_token_ids = batch_mask_token_ids + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + attention_mask = self._gen_self_attn_mask(batch_token_ids, is_unidirectional=False) + + batch["attention_mask"] = attention_mask + batch["label_pos"] = label_pos + + if not is_infer: + batch_label = np.array(batch_label).astype("int64").reshape([-1, 1]) + batch["label"] = batch_label + batch["tgt_label"] = tgt_label + batch["tgt_pos"] = tgt_pos + + batch_data_id = [record.data_id for record in batch_records] + batch["data_id"] = np.array(batch_data_id).astype("int64").reshape([-1, 1]) + return batch diff --git a/modules/text/text_generation/plato2_en_base/readers/plato_reader.py b/modules/text/text_generation/plato2_en_base/readers/plato_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..8b7aa5d54e78225546d980cbf8a084449fc9f179 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/readers/plato_reader.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plato Reader.""" + +import numpy as np + +from plato2_en_base.readers.dialog_reader import DialogReader +from plato2_en_base.utils import pad_batch_data +from plato2_en_base.utils.masking import mask + + +class PlatoReader(DialogReader): + """The implement of PlatoReader""" + + def __init__(self, args): + super(PlatoReader, self).__init__(args) + self.latent_type_size = args.latent_type_size + self.use_bow = args.use_bow + + def _pad_batch_records(self, batch_records, is_infer): + """ + Padding batch records and construct model's inputs. + """ + batch = {} + batch_token_ids = [record.token_ids for record in batch_records] + batch_type_ids = [record.type_ids for record in batch_records] + batch_pos_ids = [record.pos_ids for record in batch_records] + + batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] + + batch_size = len(batch_token_ids) + + # padding + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + + batch["generation_mask"] = self._gen_self_attn_mask( + batch_token_ids, batch_tgt_start_idx=batch_tgt_start_idx, is_unidirectional=True, shift_len=1) + if not is_infer: + batch["recognition_mask"] = self._gen_self_attn_mask(batch_token_ids, is_unidirectional=False, shift_len=1) + + if is_infer: + tgt_ids = np.array([[[self.bos_id]]] * batch_size, dtype="int64") + if self.continuous_position: + tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") + else: + tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") + tgt_pos = tgt_pos.reshape(-1, 1, 1) + batch["init_score"] = np.zeros_like(tgt_ids, dtype="float32").reshape(-1, 1).tolist() + batch["tgt_ids"] = tgt_ids.tolist() + batch["tgt_pos"] = tgt_pos.tolist() + batch["parent_idx"] = np.array(range(batch_size), dtype="int32") + + batch["tgt_generation_mask"] = batch["generation_mask"][:, 0:1, :].astype("float32") + else: + mask_return_list = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + sent_b_starts=batch_tgt_start_idx, + is_unidirectional=True, + use_latent=True, + use_bow=self.use_bow) + batch["tgt_label"] = mask_return_list[0] + batch["tgt_pos"] = mask_return_list[1] + if self.use_bow: + batch["bow_label"] = mask_return_list[2] + batch["bow_pos"] = mask_return_list[3] + + batch_data_id = [record.data_id for record in batch_records] + batch["data_id"] = np.array(batch_data_id).astype("int64").reshape([-1, 1]) + return batch diff --git a/hub_module/modules/text/text_generation/plato2_en_base/tasks/__init__.py b/modules/text/text_generation/plato2_en_base/tasks/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_base/tasks/__init__.py rename to modules/text/text_generation/plato2_en_base/tasks/__init__.py diff --git a/modules/text/text_generation/plato2_en_base/tasks/dialog_generation.py b/modules/text/text_generation/plato2_en_base/tasks/dialog_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..179098ad11bca113b1510923031ab9178f9d0fbd --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/tasks/dialog_generation.py @@ -0,0 +1,292 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Dialogue generation task.""" + +from collections import defaultdict +import math + +from plato2_en_base.readers.dialog_reader import DialogReader +from plato2_en_base.readers.plato_reader import PlatoReader +from plato2_en_base.tasks import register_task +from plato2_en_base.tasks.task_base import Task +from plato2_en_base.utils.args import str2bool +from plato2_en_base.utils.inference import create_predictor + + +def post_process_context(token_ids, reader, merge=True): + """Post-process the context sequence.""" + context = [] + utt = [] + for tok_id in token_ids[1:]: + if tok_id == reader.eos_id: + utt = reader.tokenizer.convert_ids_to_tokens(utt) + if merge: + utt = reader.tokenizer.merge_subword(utt) + context.append(utt) + utt = [] + else: + utt.append(tok_id) + return context + + +def post_process_response(token_ids, reader, merge=True): + """ + Post-process the decoded sequence. Truncate from the first + and remove the and tokens currently. + """ + eos_pos = len(token_ids) + for i, tok_id in enumerate(token_ids): + if tok_id == reader.eos_id: + eos_pos = i + break + token_ids = token_ids[1:eos_pos] + response = reader.tokenizer.convert_ids_to_tokens(token_ids) + if merge: + response = reader.tokenizer.merge_subword(response) + return token_ids, response + + +def get_cross_turn_repetition(context, pred_tokens, eos_idx, is_cn=False): + """Get cross-turn repetition.""" + if len(pred_tokens) == 0: + return 1.0 + if is_cn: + context = ["".join(utt) for utt in context] + pred_tokens = "".join(pred_tokens) + + pred_tri_grams = set() + for i in range(len(pred_tokens) - 2): + tri_gram = tuple(pred_tokens[i:i + 3]) + pred_tri_grams.add(tri_gram) + for utt in context: + for i in range(len(utt) - 2): + tri_gram = tuple(utt[i:i + 3]) + if tri_gram in pred_tri_grams: + return 1.0 + return 0.0 + + +def get_in_turn_repetition(pred, is_cn=False): + """Get in-turn repetition.""" + if len(pred) == 0: + return 1.0 + if isinstance(pred[0], str): + pred = [tok.lower() for tok in pred] + if is_cn: + pred = "".join(pred) + tri_grams = set() + for i in range(len(pred) - 2): + tri_gram = tuple(pred[i:i + 3]) + if tri_gram in tri_grams: + return 1.0 + tri_grams.add(tri_gram) + return 0.0 + + +def get_nsp_score_batch(nsp_predictor, predictions): + """ + Get NSP scores of a batch. + """ + import argparse + from collections import namedtuple + + from plato2_en_base.readers.nsp_reader import NSPReader + from plato2_en_base.utils.args import parse_args + from plato2_en_base.tasks.next_sentence_prediction import NextSentencePrediction + + parser = argparse.ArgumentParser() + NextSentencePrediction.add_cmdline_args(parser) + parser.add_argument("--num_samples", type=int, default=None) + parser.add_argument("--config_path", type=str, required=True) + parser.add_argument("--mem_efficient", type=str2bool, default=False) + + args = parse_args(parser, allow_unknown=True) + args.load(args.config_path) + if not args.mem_efficient: + if args.num_samples: + args.batch_size *= args.num_samples + if args.latent_type_size: + args.batch_size *= args.latent_type_size + args.tokenized_input = True + reader = NSPReader(args) + + def __reader__(): + headers = ["src", "tgt", "data_id"] + + Example = namedtuple("Example", headers) + + for i, info in enumerate(predictions): + context = post_process_context(info["context_token_ids"], reader, merge=False) + context_tokenized_input = " [SEP] ".join(" ".join(utt) for utt in context) + _, response = post_process_response(info["response_token_ids"], reader, merge=False) + response_tokenized_input = " ".join(response) + example = Example(src=context_tokenized_input, tgt=response_tokenized_input, data_id=i) + record = reader._convert_example_to_record(example, is_infer=True) + yield record + return + + generator = reader.data_generator( + reader=__reader__, + is_infer=True, + phase="test", + ) + + steps = 0 + for data in generator(): + outputs = nsp_predictor(data) + for probs, data_id in zip(outputs[0], outputs[-1]): + data_id = data_id[0] + info = predictions[data_id] + info["nsp_score"] = float(probs[1]) + + return + + +@register_task("DialogGeneration") +class DialogGeneration(Task): + """ + Define dialogue response generation. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Task") + group.add_argument("--do_generation", type=str2bool, default=False) + group.add_argument("--is_cn", type=str2bool, default=False) + + group.add_argument("--nsp_inference_model_path", type=str, default=None) + group.add_argument("--nsp_attention_style", type=str, default="bidirectional") + + group.add_argument("--ranking_score", type=str, default="decode_score") + + args, _ = parser.parse_known_args() + if args.model == "Plato": + PlatoReader.add_cmdline_args(parser) + else: + DialogReader.add_cmdline_args(parser) + return group + + def __init__(self, args): + super(DialogGeneration, self).__init__(args) + self.do_generation = args.do_generation + self.is_cn = args.is_cn + if args.model == "Plato": + self.reader = PlatoReader(args) + else: + self.reader = DialogReader(args) + + if args.nsp_inference_model_path: + self.nsp_predictor = create_predictor(args.nsp_inference_model_path, args.is_distributed) + self.nsp_attention_style = args.nsp_attention_style + else: + self.nsp_predictor = None + + self.ranking_score = args.ranking_score + self.max_dec_len = args.max_dec_len + return + + def _post_process_generation_output(self, predictions): + """ + Post process generation output. + + Calculate repetion, reranking. + """ + for info in predictions: + tokens = post_process_context(info["context_token_ids"], self.reader) + pred_token_ids, pred_tokens = post_process_response(info["response_token_ids"], self.reader) + info["context"] = " [SEP] ".join(" ".join(u) for u in tokens) + info["response"] = " ".join(pred_tokens) + info["num_token"] = len(pred_token_ids) + info["cross_turn_repetition"] = get_cross_turn_repetition(tokens, pred_tokens, self.reader.eos_id, + self.is_cn) + info["in_turn_repetition"] = max( + get_in_turn_repetition(pred_tokens, self.is_cn), get_in_turn_repetition(pred_token_ids)) + if self.nsp_predictor is not None: + get_nsp_score_batch(self.nsp_predictor, predictions) + + group = defaultdict(list) + for info in predictions: + group[info["data_id"]].append(info) + + predictions = [] + for data_id in group: + infos = group[data_id] + for info in infos: + info["score"] = info[self.ranking_score] + if self.max_dec_len is not None and info["num_token"] >= self.max_dec_len: # not ending + info["score"] -= 1e3 + elif info["cross_turn_repetition"] > 0: + info["score"] -= 1e3 + elif info["in_turn_repetition"] > 0: + info["score"] -= 1e3 + infos = sorted(infos, key=lambda info: -info["score"]) + pred = infos[0] + keep_attr = ["data_id", "score", "response"] + pred = {k: pred[k] for k in keep_attr} + predictions.append(pred) + return predictions + + def _post_process_scoring_output(self, predictions): + raise NotImplementedError + + def _post_process_infer_output(self, predictions): + if self.do_generation: + return self._post_process_generation_output(predictions) + else: + return self._post_process_scoring_output(predictions) + + def merge_mertrics_and_statistics(self, outputs, part_outputs): + """ + Merge two evaulation output. + """ + if outputs is None: + return part_outputs + + if part_outputs is None: + return outputs + + batch_size = outputs.pop("batch_size") + tokens_num = outputs.pop("tokens_num") + part_batch_size = part_outputs.pop("batch_size") + part_tokens_num = part_outputs.pop("tokens_num") + + new_outputs = {"batch_size": batch_size + part_batch_size, "tokens_num": tokens_num + part_tokens_num} + for k in outputs: + if k.startswith("token_"): + new_outputs[k] = ( + outputs[k] * tokens_num + part_outputs[k] * part_tokens_num) / new_outputs["tokens_num"] + else: + new_outputs[k] = ( + outputs[k] * batch_size + part_outputs[k] * part_batch_size) / new_outputs["batch_size"] + return new_outputs + + def get_metrics(self, outputs): + """ + Get metrics. + """ + if outputs is None: + raise ValueError("metrics is None") + outputs = dict(outputs) + outputs.pop("batch_size", None) + outputs.pop("tokens_num", None) + metrics = {} + for k in outputs: + if k.startswith("token_"): + metrics[k[6:]] = outputs[k] + else: + metrics[k] = outputs[k] + if k == "token_lm_loss": + metrics["ppl"] = math.exp(outputs[k]) + return metrics diff --git a/modules/text/text_generation/plato2_en_base/tasks/next_sentence_prediction.py b/modules/text/text_generation/plato2_en_base/tasks/next_sentence_prediction.py new file mode 100644 index 0000000000000000000000000000000000000000..65ff60b3deac704a957e260713c6ee4e87b908a1 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/tasks/next_sentence_prediction.py @@ -0,0 +1,44 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Next sentence prediction task.""" + +from plato2_en_base.readers.nsp_reader import NSPReader +from plato2_en_base.tasks import register_task +from plato2_en_base.tasks.task_base import Task +from plato2_en_base.utils.args import str2bool + + +@register_task("NextSentencePrediction") +class NextSentencePrediction(Task): + """ + Define dialogue response generation. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = NSPReader.add_cmdline_args(parser) + return group + + def __init__(self, args): + super(NextSentencePrediction, self).__init__(args) + self.reader = NSPReader(args) + return + + def _post_process_infer_output(self, predictions): + predictions = [{ + "data_id": data_id.tolist()[0], + "score": score.tolist()[1] + } for data_id, score in zip(predictions["data_id"], predictions["scores"])] + return predictions diff --git a/modules/text/text_generation/plato2_en_base/tasks/task_base.py b/modules/text/text_generation/plato2_en_base/tasks/task_base.py new file mode 100644 index 0000000000000000000000000000000000000000..201b9ce6311e45f3200102ced8f28d83c5becde0 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/tasks/task_base.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Task base.""" + +from abc import abstractmethod, ABC + +from plato2_en_base.models.model_base import Model + + +class Task(ABC): + """ + Basic task. + """ + + def __init__(self, args): + return + + def train_step(self, model: Model, inputs): + """Run one training step.""" + outputs = model.train_step(inputs) + outputs = {k: v.tolist()[0] for k, v in outputs.items()} + return outputs + + def eval_step(self, model: Model, inputs): + """Run one evaluation step""" + outputs = model.eval_step(inputs) + outputs = {k: v.tolist()[0] for k, v in outputs.items()} + return outputs + + def infer_step(self, model: Model, inputs): + """Run one inference step.""" + predictions = model.infer_step(inputs) + outputs = self._post_process_infer_output(predictions) + return outputs + + def _post_process_infer_output(self, predictions): + """ + Post-process inference output. + """ + return predictions + + def merge_mertrics_and_statistics(self, outputs, part_outputs): + """ + Merge metrics and statistics. + """ + if outputs is None: + return part_outputs + + if part_outputs is None: + return outputs + + batch_size = outputs.pop("batch_size") + part_batch_size = part_outputs.pop("batch_size") + + new_outputs = { + "batch_size": batch_size + part_batch_size, + } + for k in outputs: + new_outputs[k] = (outputs[k] * batch_size + part_outputs[k] * part_batch_size) / new_outputs["batch_size"] + return new_outputs + + def get_metrics(self, outputs): + """ + Get metrics. + """ + if outputs is None: + raise ValueError("metrics is None") + outputs = dict(outputs) + # pop statistics + outputs.pop("batch_size", None) + return outputs + + def get_data_loader(self, model, *args, is_infer=False, **kwargs): + generator = self.reader.data_generator(*args, is_infer=is_infer, **kwargs) + return model.get_data_loader(generator, is_infer) diff --git a/modules/text/text_generation/plato2_en_base/utils/__init__.py b/modules/text/text_generation/plato2_en_base/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4068cbd6fe3a2b818b4a435fe79738837a4f812f --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/utils/__init__.py @@ -0,0 +1,173 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils.""" + +from itertools import chain +import os +import time +import sys + +import numpy as np +import paddle.fluid as fluid + + +def to_lodtensor(data, place): + """Convert data to LoDTensor.""" + if place is None: + return data + lengths = [] + while isinstance(data[0], list): + lengths.append(list(map(len, data))) + data = [x for xs in data for x in xs] + if isinstance(data[0], float): + data = np.array(data, dtype="float32") + else: + data = np.array(data, dtype="int64") + data_tensor = fluid.LoDTensor() + data_tensor.set(data, place) + data_tensor.set_recursive_sequence_lengths(lengths) + return data_tensor + + +def pad_batch_data(insts, pad_id=0): + """Pad the instances to the max sequence length in batch. """ + max_len = max(map(len, insts)) + inst_data = np.array([list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts]) + return inst_data.astype("int64").reshape([-1, max_len, 1]) + + +def convert_lodtensor_to_list(tensor): + data = np.array(tensor) + recursive_sequence_lengths = tensor.recursive_sequence_lengths() + recursive_sequence_lengths.reverse() + for i, lengths in enumerate(recursive_sequence_lengths): + shift = 0 + new_data = [] + for j, l in enumerate(lengths): + new_data.append(data[shift:shift + l]) + shift += l + data = new_data + return data + + +def concatenate_lodtensors(tensors, place): + """Concatenate LoD tensors.""" + data = [] + recursive_sequence_lengths = [] + for tensor in tensors: + data.append(np.array(tensor)) + recursive_sequence_lengths.append(tensor.recursive_sequence_lengths()) + data = np.concatenate(data, axis=0) + recursive_sequence_lengths = [sum(lens, []) for lens in zip(*recursive_sequence_lengths)] + data_tensor = fluid.LoDTensor() + data_tensor.set(data, place) + data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) + assert data_tensor.has_valid_recursive_sequence_lengths() + return data_tensor + + +def repeat_array_or_tensor(array_or_tensor, place, times): + """Repeate numpy array or LoD tensor.""" + if isinstance(array_or_tensor, fluid.LoDTensor): + data = [np.array(array_or_tensor)] * times + recursive_sequence_lengths = [array_or_tensor.recursive_sequence_lengths()] * times + data = np.concatenate(data, axis=0) + recursive_sequence_lengths = [sum(lens, []) for lens in zip(*recursive_sequence_lengths)] + data_tensor = fluid.LoDTensor() + data_tensor.set(data, place) + data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) + assert data_tensor.has_valid_recursive_sequence_lengths() + return data_tensor + elif isinstance(array_or_tensor, list): + return list(chain(*([array_or_tensor] * times))) + else: + return np.concatenate([array_or_tensor] * times, axis=0) + + +def slice_array_or_tensor(array_or_tensor, place, begin, end): + """Repeate numpy array or LoD tensor.""" + if isinstance(array_or_tensor, fluid.LoDTensor): + data = convert_lodtensor_to_list(array_or_tensor) + data = data[begin:end] + return to_lodtensor(data, place) + else: + return array_or_tensor[begin:end] + + +def init_checkpoint(exe, init_checkpoint_path, main_program): + """Initialize from checkpoint.""" + assert os.path.exists(init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path + + def existed_persitables(var): + """Whether var is a persistables.""" + if not fluid.io.is_persistable(var): + return False + return os.path.exists(os.path.join(init_checkpoint_path, var.name)) + + fluid.io.load_vars(exe, init_checkpoint_path, main_program=main_program, predicate=existed_persitables) + print(f"Load model from {init_checkpoint_path}") + + +def init_pretraining_params(exe, pretraining_params_path, main_program): + """Only initialize parameters.""" + assert os.path.exists(pretraining_params_path), "[%s] cann't be found." % pretraining_params_path + + def existed_params(var): + """Whether var is a parameter.""" + if not isinstance(var, fluid.framework.Parameter): + return False + return os.path.exists(os.path.join(pretraining_params_path, var.name)) + + fluid.io.load_vars(exe, pretraining_params_path, main_program=main_program, predicate=existed_params) + print(f"Load pretraining parameters from {pretraining_params_path}.") + + return + + +class Timer(object): + def __init__(self): + self._pass_time = 0 + self._start_time = None + return + + def start(self): + self._start_time = time.time() + + def pause(self): + self._pass_time += time.time() - self._start_time + self._start_time = None + + def reset(self): + self._pass_time = 0 + + @property + def pass_time(self): + if self._start_time is None: + return self._pass_time + else: + return self._pass_time + time.time() - self._start_time + + +ERROR_MESSAGE = "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ + Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" + + +def check_cuda(use_cuda, err=ERROR_MESSAGE): + """Check CUDA.""" + try: + if use_cuda and not fluid.is_compiled_with_cuda(): + print(err) + sys.exit(1) + except Exception as e: + pass diff --git a/hub_module/modules/text/text_generation/plato2_en_base/utils/args.py b/modules/text/text_generation/plato2_en_base/utils/args.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_base/utils/args.py rename to modules/text/text_generation/plato2_en_base/utils/args.py diff --git a/modules/text/text_generation/plato2_en_base/utils/inference.py b/modules/text/text_generation/plato2_en_base/utils/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..f21b0162533dd0748ecae6cf6cbf792003c9ec13 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/utils/inference.py @@ -0,0 +1,42 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Inference utils.""" + +import os + +import paddle.fluid as fluid + + +def create_predictor(inference_model_path, is_distributed=False): + """Create predictor.""" + if is_distributed: + dev_count = fluid.core.get_cuda_device_count() + gpu_id = int(os.getenv("FLAGS_selected_gpus")) + else: + dev_count = 1 + gpu_id = 0 + + place = fluid.CUDAPlace(gpu_id) + exe = fluid.Executor(place) + + scope = fluid.Scope() + with fluid.scope_guard(scope): + inference_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model(inference_model_path, exe) + + def __predict__(inputs): + with fluid.scope_guard(scope): + outputs = exe.run(inference_prog, feed=inputs, fetch_list=fetch_targets, return_numpy=True) + return outputs + + return __predict__ diff --git a/modules/text/text_generation/plato2_en_base/utils/masking.py b/modules/text/text_generation/plato2_en_base/utils/masking.py new file mode 100644 index 0000000000000000000000000000000000000000..a4fb1eb78d716854d35c11bea67180e196ef5124 --- /dev/null +++ b/modules/text/text_generation/plato2_en_base/utils/masking.py @@ -0,0 +1,122 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Reader utils.""" + +import numpy as np + +import plato2_en_base.utils + + +def mask(batch_tokens, + vocab_size, + bos_id=1, + eos_id=2, + mask_id=3, + sent_b_starts=None, + labels=None, + is_unidirectional=False, + use_latent=False, + use_bow=False): + """ + Add mask for batch_tokens, return out, mask_label, mask_pos; + Note: mask_pos responding the batch_tokens after padded; + """ + batch_tokens = np.copy(batch_tokens) + max_len = max(map(len, batch_tokens)) + mask_label = [] + mask_pos = [] + if labels is not None: + label_pos = [] + + if is_unidirectional: + # unidirectional language model + if use_latent: + max_len += 1 + shift_len = 1 + else: + shift_len = 0 + for sent_index, sent in enumerate(batch_tokens): + sent_b_index = sent_b_starts[sent_index] if sent_b_starts is not None else 0 + need_cal = True + if labels is not None: + label_pos.append(sent_index * max_len + len(sent) - 1 + shift_len) + if labels[sent_index] == 0: + need_cal = False + mask_label.extend(sent[sent_b_index + 1:]) + mask_pos.extend([sent_index * max_len + i + shift_len for i in range(sent_b_index, len(sent) - 1)]) + mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) + mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) + return_list = [mask_label, mask_pos] + + # latent related (bow label and pos) + if use_latent and use_bow: + bow_label = [] + bow_pos = [] + for sent_index, sent in enumerate(batch_tokens): + sent_b_index = sent_b_starts[sent_index] if sent_b_starts is not None else 0 + + def __filter__(tok_id): + # TODO: exclude [EOS] from bow loss + return True + + bow_pos.extend([sent_index for i in range(sent_b_index + 1, len(sent)) if __filter__(sent[i])]) + bow_label.extend([sent[i] for i in range(sent_b_index + 1, len(sent)) if __filter__(sent[i])]) + bow_label = np.array(bow_label).astype("int64").reshape([-1, 1]) + bow_pos = np.array(bow_pos).astype("int64").reshape([-1, 1]) + return_list += [bow_label, bow_pos] + else: + # bidirectional mask language model + total_token_num = sum(map(len, batch_tokens)) + prob_mask = np.random.rand(total_token_num) + # TODO: fix replace_ids, include [UNK] + replace_ids = np.random.randint(3, high=vocab_size, size=total_token_num) + prob_index = 0 + for sent_index, sent in enumerate(batch_tokens): + # add pair label position + if labels is not None: + label_pos.append(sent_index * max_len) + + # add mask label and position + for token_index, token in enumerate(sent): + if token == eos_id or token == bos_id: + continue + prob = prob_mask[prob_index + token_index] + if prob > 0.15: + continue + elif 0.03 < prob <= 0.15: + # mask + mask_label.append(sent[token_index]) + sent[token_index] = mask_id + mask_pos.append(sent_index * max_len + token_index) + elif 0.015 < prob <= 0.03: + # random replace + mask_label.append(sent[token_index]) + sent[token_index] = replace_ids[prob_index + token_index] + mask_pos.append(sent_index * max_len + token_index) + else: + # keep the original token + mask_label.append(sent[token_index]) + mask_pos.append(sent_index * max_len + token_index) + + prob_index += len(sent) + + mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) + mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) + return_list = [batch_tokens, mask_label, mask_pos] + + if labels is not None: + label_pos = np.array(label_pos).astype("int64").reshape([-1, 1]) + assert len(labels) == len(label_pos) + return_list.append(label_pos) + return return_list diff --git a/hub_module/modules/text/text_generation/plato2_en_base/utils/tokenization.py b/modules/text/text_generation/plato2_en_base/utils/tokenization.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_base/utils/tokenization.py rename to modules/text/text_generation/plato2_en_base/utils/tokenization.py diff --git a/hub_module/modules/text/text_generation/plato2_en_large/README.md b/modules/text/text_generation/plato2_en_large/README.md similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_large/README.md rename to modules/text/text_generation/plato2_en_large/README.md diff --git a/hub_module/modules/text/text_generation/plato2_en_base/__init__.py b/modules/text/text_generation/plato2_en_large/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_base/__init__.py rename to modules/text/text_generation/plato2_en_large/__init__.py diff --git a/modules/text/text_generation/plato2_en_large/models/__init__.py b/modules/text/text_generation/plato2_en_large/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..353c85a26ac4e8c479319abfbfff038261701fc7 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/__init__.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Define model.""" + +from plato2_en_large.models.model_base import Model + +MODEL_REGISTRY = {} + +__all__ = ["MODEL_REGISTRY", "register_model", "create_model", "add_cmdline_args"] + + +def register_model(name): + """ + Register a new model class. + """ + + def __wrapped__(cls): + if name in MODEL_REGISTRY: + raise ValueError(f"Cannot register duplicate model ({name})") + if not issubclass(cls, Model): + raise ValueError(f"Model ({name}: {cls.__name__}) must extend Model") + MODEL_REGISTRY[name] = cls + return cls + + return __wrapped__ + + +def create_model(args, place) -> Model: + """ + Create a model. + """ + return MODEL_REGISTRY[args.model](args, place) + + +def add_cmdline_args(parser): + """ Add cmdline argument of Model. """ + group = parser.add_argument_group("Model") + + # Model + group.add_argument("--model", type=str, required=True) + + # Config + group.add_argument("--config_path", type=str, required=True) + + # Model related. + args, _ = parser.parse_known_args() + if args.model not in MODEL_REGISTRY: + raise ValueError(f"Unknown model type: {args.model}") + MODEL_REGISTRY[args.model].add_cmdline_args(parser) + return group + + +import plato2_en_large.models.nsp_model +import plato2_en_large.models.plato diff --git a/modules/text/text_generation/plato2_en_large/models/generator.py b/modules/text/text_generation/plato2_en_large/models/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..0117f2f7f0a0fa62a51d1cdb4ec52ada32903452 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/generator.py @@ -0,0 +1,268 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generator class""" + +import numpy as np +import paddle.fluid.layers as layers + +from plato2_en_large.utils.args import str2bool + + +class Generator(object): + """ + Generator class + + Use generator in inference phase. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Generator") + group.add_argument("--min_dec_len", type=int, default=1) + group.add_argument("--max_dec_len", type=int, default=64) + + group.add_argument( + "--decoding_strategy", + type=str, + default="topk_sampling", + choices=["beam_search", "topk_sampling", "topp_sampling"]) + group.add_argument("--temperature", type=float, default=1.) + group.add_argument("--ignore_unk", type=str2bool, default=True) + + # multi sampling + group.add_argument("--num_samples", type=int, default=None) + + # top-k sampling + group.add_argument("--topk", type=int, default=10) + + # top-p sampling + group.add_argument("--topp", type=float, default=0.9) + + # beam search + group.add_argument("--beam_size", type=int, default=10) + group.add_argument("--length_average", type=str2bool, default=True) + group.add_argument("--length_penalty", type=float, default=0.0) + + return group + + def __init__(self, args): + self.min_dec_len = args.min_dec_len + self.max_dec_len = args.max_dec_len + self.eos_id = args.eos_id + self.unk_id = args.unk_id + self.mask_id = args.mask_id + self.vocab_size = args.vocab_size + + # model related + + # basic settings + self.decoding_strategy = args.decoding_strategy + self.ignore_unk = args.ignore_unk + self.continuous_position = args.continuous_position + self.temperature = args.temperature + + # reranking + self.num_samples = args.num_samples + + # top-k sampling + self.topk = args.topk + + # top-p sampling + self.topp = args.topp + + # beam search + self.beam_size = args.beam_size + self.length_penalty = args.length_penalty + self.length_average = args.length_average + return + + def inference(self, model, inputs, outputs): + """ + Run inference. + + Args: + inputs(dict): Its key is input name(str) and its value is a Variable. + model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`. + + Returns: + dict(str:Variable): Its key is output name(str) and its value is a Variable. + """ + # prepare while loop + max_len = layers.fill_constant(shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True) + min_len = layers.fill_constant(shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True) + step_idx = layers.fill_constant(shape=[1], dtype="int64", value=0, force_cpu=True) + + ids = layers.array_write(layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx) + pos_biases = layers.array_write(layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx) + scores = layers.array_write(inputs["init_score"], step_idx) + tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], step_idx) + parent_idx = inputs["parent_idx"] + + if self.decoding_strategy == "beam_search": + beam_size = self.beam_size + else: + beam_size = 1 + + eos_penalty = np.zeros(self.vocab_size, dtype="float32") + eos_penalty[self.eos_id] = -1e9 + eos_penalty = layers.assign(eos_penalty) + + token_penalty = np.zeros(self.vocab_size, dtype="float32") + token_penalty[self.unk_id] = -1e9 + if self.mask_id >= 0: + token_penalty[self.mask_id] = -1e9 + token_penalty = layers.assign(token_penalty) + + # start while loop + cond = layers.less_than(x=step_idx, y=max_len) + while_op = layers.While(cond) + with while_op.block(): + pre_ids = layers.array_read(array=ids, i=step_idx) + pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) + pre_scores = layers.array_read(array=scores, i=step_idx) + pos_bias = layers.array_read(array=pos_biases, i=step_idx) + pos_bias = layers.gather(input=pos_bias, index=parent_idx) + + tmp_tgt_generation_mask = layers.array_read(tgt_generation_mask, i=step_idx) + dtype = tmp_tgt_generation_mask.dtype + + append_mask = layers.fill_constant_batch_size_like(input=pre_ids, value=1.0, shape=[-1, 1, 1], dtype=dtype) + tmp_tgt_generation_mask = layers.concat([tmp_tgt_generation_mask, append_mask], axis=2) + pre_mask = tmp_tgt_generation_mask = layers.gather(input=tmp_tgt_generation_mask, index=parent_idx) + + pre_sent = layers.fill_constant_batch_size_like( + input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype) + + if self.continuous_position: + pre_pos = layers.elementwise_mul( + x=layers.fill_constant_batch_size_like( + input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), + y=step_idx, + axis=0) + pos_bias + else: + pre_pos = layers.elementwise_mul( + x=layers.fill_constant_batch_size_like( + input=pre_mask, value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), + y=step_idx, + axis=0) + + dec_out, _ = model._generation_network( + token_ids=pre_ids, + type_ids=pre_sent, + pos_ids=pre_pos, + generation_mask=tmp_tgt_generation_mask, + gather_idx=parent_idx) + logits = model._calc_logits(dec_out) + + # ignore unk and mask token + if self.ignore_unk: + logits = layers.elementwise_add(logits, token_penalty, axis=1) + + # min dec length + min_len_cond = layers.less_than(x=step_idx, y=min_len) + + def min_len_penalty(): + """Plus minimum length penalty.""" + return layers.elementwise_add(logits, eos_penalty, axis=1) + + def no_penalty(): + """No penalty.""" + return logits + + logits = layers.case([(min_len_cond, min_len_penalty)], default=no_penalty) + + # get probs + probs = layers.softmax(logits / self.temperature) + + if self.decoding_strategy == "beam_search": + topk_scores, topk_indices = layers.topk(input=probs, k=beam_size) + else: + if self.decoding_strategy.startswith("sampling"): + sampling_ids = layers.sampling_id(probs, dtype="int") + elif self.decoding_strategy.startswith("topk_sampling"): + topk_probs, _ = layers.topk(input=probs, k=self.topk) + ge_cond = layers.cast( + layers.greater_equal(probs, layers.unsqueeze(topk_probs[:, -1], [1])), "float32") + old_probs = probs + probs = probs * ge_cond / layers.reduce_sum(topk_probs, dim=-1, keep_dim=True) + sampling_ids = layers.sampling_id(probs, dtype="int") + probs = old_probs + elif self.decoding_strategy.startswith("topp_sampling"): + sorted_probs, sorted_idx = layers.argsort(probs, descending=True) + cum_sorted_probs = layers.cumsum(sorted_probs, axis=1, exclusive=True) + lt_cond = layers.cast( + layers.less_than( + cum_sorted_probs, + layers.fill_constant_batch_size_like(cum_sorted_probs, cum_sorted_probs.shape, + cum_sorted_probs.dtype, self.topp)), "float32") + old_probs = probs + candidate_probs = sorted_probs * lt_cond + probs = candidate_probs / layers.reduce_sum(candidate_probs, dim=-1, keep_dim=True) + sampling_ids = layers.sampling_id(probs, dtype="int") + sampling_ids = layers.index_sample(sorted_idx, layers.unsqueeze(sampling_ids, [1])) + sampling_ids = layers.squeeze(sampling_ids, [1]) + probs = old_probs + else: + raise ValueError(self.decoding_strategy) + + sampling_scores = layers.one_hot(layers.unsqueeze(sampling_ids, [1]), probs.shape[1]) + sampling_scores = sampling_scores * probs - (1 - sampling_scores) * 1e3 + topk_scores, topk_indices = layers.topk(input=sampling_scores, k=1) + + pre_len = layers.cast(step_idx, "float32") + layers.increment(x=step_idx, value=1.0, in_place=True) + cur_len = layers.cast(step_idx, "float32") + + # update scores + if self.length_average: + accu_scores = layers.elementwise_add( + x=layers.log(topk_scores), y=pre_scores * pre_len, axis=0) / cur_len + elif self.length_penalty > 0: + pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty) + cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty) + accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores * pre_lp, axis=0) / cur_lp + else: + accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores, axis=0) + topk_indices = layers.lod_reset(topk_indices, pre_ids) + accu_scores = layers.lod_reset(accu_scores, pre_ids) + selected_ids, selected_scores, gather_idx = layers.beam_search( + pre_ids=pre_ids, + pre_scores=pre_scores, + ids=topk_indices, + scores=accu_scores, + beam_size=beam_size, + end_id=self.eos_id, + return_parent_idx=True) + + layers.array_write(selected_ids, i=step_idx, array=ids) + layers.array_write(selected_scores, i=step_idx, array=scores) + layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask) + layers.array_write(pos_bias, i=step_idx, array=pos_biases) + + layers.assign(gather_idx, parent_idx) + + length_cond = layers.less_than(x=step_idx, y=max_len) + finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) + layers.logical_and(x=length_cond, y=finish_cond, out=cond) + + finished_ids, finished_scores = layers.beam_search_decode(ids, scores, beam_size=beam_size, end_id=self.eos_id) + + predictions = { + "finished_ids": finished_ids, + "finished_scores": finished_scores, + "token_ids": inputs["token_ids"], + "data_id": inputs["data_id"] + } + return predictions diff --git a/modules/text/text_generation/plato2_en_large/models/model_base.py b/modules/text/text_generation/plato2_en_large/models/model_base.py new file mode 100644 index 0000000000000000000000000000000000000000..6edcadda85d579cd87053a4c765462587d4feee6 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/model_base.py @@ -0,0 +1,288 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Model base.""" + +from abc import abstractmethod, ABC + +import paddle.fluid as fluid +from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +import paddle.fluid.layers as layers + +from plato2_en_large.models.optimizer import AdamW +from plato2_en_large.utils import init_pretraining_params, init_checkpoint, to_lodtensor +from plato2_en_large.utils.args import str2bool + + +class Model(ABC): + """ + Basic model wrapper for paddle. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Model") + # Init checkpoint + group.add_argument("--init_checkpoint", type=str, default="") + group.add_argument("--init_pretraining_params", type=str, default="") + + # Optimizer + group.add_argument("-lr", "--learning_rate", type=float, default=1e-5, help="The learning rate for optimizer.") + group.add_argument("--warmup_steps", type=int, default=0, help="The warmup steps.") + group.add_argument("--weight_decay", type=float, default=0.0, help="The weight decay for optimizer.") + group.add_argument("--max_grad_norm", type=float, default=.1, help="The maximum norm of gradient.") + + group.add_argument("--use_recompute", type=str2bool, default=False) + group.add_argument("--use_amp", type=str2bool, default=False) + group.add_argument("--amp_loss_scaling", type=float, default=12800) + return group + + def __init__(self, args, place): + self.place = place + self.exe = fluid.Executor(place) + + self.init_checkpoint = args.init_checkpoint + self.init_pretraining_params = args.init_pretraining_params + + self.learning_rate = args.learning_rate + self.warmup_steps = args.warmup_steps + self.weight_decay = args.weight_decay + self.max_grad_norm = args.max_grad_norm + + self.is_distributed = args.is_distributed + self.use_recompute = args.use_recompute + self.use_amp = args.use_amp + self.amp_loss_scaling = args.amp_loss_scaling + self.run_infer = args.get("run_infer", False) + self.batch_size = args.get("batch_size", 1) + self._build_programs() + return + + def _build_programs(self): + """ + Build programs. + + Build train_program, eval_program and inference_program. Only use in static graph mode. + """ + if self.run_infer: + self.startup_program = fluid.Program() + # build infer program + self.infer_program = fluid.Program() + with fluid.program_guard(self.infer_program, self.startup_program): + with fluid.unique_name.guard(): + self.infer_feed_dict = inputs = self._get_feed_dict(is_infer=True) + outputs = self.forward(inputs, is_infer=True) + predictions = self.infer(inputs, outputs) + self.infer_fetch_dict = predictions + self.infer_program = self.infer_program.clone(for_test=True) + + self.program = self.infer_program + else: + if self.is_distributed: + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.use_experimental_executor = True + exec_strategy.num_threads = 4 + exec_strategy.num_iteration_per_drop_scope = 1 + + dist_strategy = DistributedStrategy() + dist_strategy.exec_strategy = exec_strategy + dist_strategy.nccl_comm_num = 1 + dist_strategy.fuse_all_reduce_ops = True + if self.use_recompute: + dist_strategy.forward_recompute = True + dist_strategy.enable_sequential_execution = True + if self.use_amp: + dist_strategy.use_amp = True + dist_strategy.amp_loss_scaling = self.amp_loss_scaling + self.dist_strategy = dist_strategy + + self.startup_program = fluid.Program() + # build train program + self.train_program = fluid.Program() + with fluid.program_guard(self.train_program, self.startup_program): + with fluid.unique_name.guard(): + self.feed_dict = inputs = self._get_feed_dict() + outputs = self.forward(inputs) + if self.is_distributed and self.use_recompute: + self.dist_strategy.recompute_checkpoints = outputs["checkpoints"] + metrics, statistics = self.get_metrics_and_statistics(inputs, outputs) + + # build eval program + self.eval_program = self.train_program.clone(for_test=True) + self.eval_fetch_dict = {**metrics, **statistics} + + scheduled_lr = self.optimize(metrics) + metrics["scheduled_lr"] = scheduled_lr + self.train_fetch_dict = metrics + + self.program = self.train_program + if self.is_distributed: + self.train_program = fleet.main_program + + self.exe.run(self.startup_program) + if self.init_pretraining_params != "": + init_pretraining_params(self.exe, self.init_pretraining_params, self.program) + elif self.init_checkpoint != "": + init_checkpoint(self.exe, self.init_checkpoint, self.program) + return + + def load(self, model_dir, is_checkpoint=False): + """ + Load persistables or parameters. + """ + # TODO: support dygraph. + if is_checkpoint: + init_checkpoint(self.exe, model_dir, self.program) + else: + init_pretraining_params(self.exe, model_dir, self.program) + return + + def save(self, model_dir, is_checkpoint=False): + """ + Save persistables or parameters. + """ + # TODO: support dygraph. + if is_checkpoint: + fluid.io.save_persistables(self.exe, model_dir, self.program) + else: + fluid.io.save_params(self.exe, model_dir, self.program) + return + + @abstractmethod + def _get_feed_dict(self, is_infer=False): + """ + Return input feed list. + """ + pass + + def _get_feed(self, inputs, is_infer=False): + """ + Convert `inputs` into model's feed data format. + """ + if isinstance(inputs, list): + # return list direclty which is used in `get_data_loader`. + return inputs + for k in inputs: + if isinstance(inputs[k], list): + inputs[k] = to_lodtensor(inputs[k], self.place) + return inputs + + def get_data_loader(self, generator=None, is_infer=False): + """ + Return DataLoader. + + If generator is not `None`, the data loader set it as the batch generator. + """ + # TODO: support dygraph. + if is_infer: + feed_name_list, feed_list = zip(*self.infer_feed_dict.items()) + else: + feed_name_list, feed_list = zip(*self.feed_dict.items()) + loader = fluid.io.DataLoader.from_generator( + feed_list=feed_list, capacity=64, use_double_buffer=True, iterable=True) + if generator is not None: + + def __wrapper__(): + for batch in generator(): + batch = self._get_feed(batch) + batch = [batch[name] for name in feed_name_list if name in batch] + yield batch + + loader.set_batch_generator(__wrapper__, self.place) + return loader + + @abstractmethod + def forward(self, inputs, is_infer=False): + """ + Run model main forward. + """ + pass + + @abstractmethod + def get_metrics_and_statistics(self, inputs, outputs): + """ + Get metrics and statistics. + """ + pass + + @abstractmethod + def infer(self, inputs, outputs): + """ + Run model inference. + """ + pass + + def optimize(self, metrics): + """ + Optimize the model by metrics(mainly `metrics["loss"]`). + """ + # TODO: support dygraph + if self.warmup_steps > 0: + scheduled_lr = layers.learning_rate_scheduler.noam_decay(1 / (self.warmup_steps * (self.learning_rate**2)), + self.warmup_steps) + else: + scheduled_lr = layers.create_global_var( + name=fluid.unique_name.generate("learning_rate"), + shape=[1], + value=self.learning_rate, + dtype="float32", + persistable=True) + grad_clip = fluid.clip.GradientClipByGlobalNorm(self.max_grad_norm) + + self.optimizer = AdamW(learning_rate=scheduled_lr, grad_clip=grad_clip, weight_decay=self.weight_decay) + + if self.is_distributed: + self.optimizer = fleet.distributed_optimizer(self.optimizer, strategy=self.dist_strategy) + + self.optimizer.minimize(metrics["loss"]) + return scheduled_lr + + def _execute(self, program, feed, fetch_dict, **kwargs): + """ + Execute program. + """ + fetch_list = [var.name for var in fetch_dict.values()] + fetch_vars = self.exe.run(program, feed, fetch_list, **kwargs) + return dict(zip(fetch_dict.keys(), fetch_vars)) + + def train_step(self, inputs): + """ + Run one training step. + """ + # TODO: support dygraph. + return self._execute(self.train_program, self._get_feed(inputs), self.train_fetch_dict, use_program_cache=True) + + def eval_step(self, inputs): + """ + Run one evaluation step. + """ + # TODO: support dygraph. + return self._execute(self.eval_program, self._get_feed(inputs), self.eval_fetch_dict) + + def infer_step(self, inputs): + """ + Run one inference step. + """ + # TODO: support dygraph. + return self._execute(self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict) + + def save_inference_model(self, inference_model_path): + """ + Save the inference model. + """ + feed_list = [var.name for var in self.infer_feed_dict.values()] + fetch_list = list(self.infer_fetch_dict.values()) + + fluid.io.save_inference_model(inference_model_path, feed_list, fetch_list, self.exe, self.infer_program) diff --git a/modules/text/text_generation/plato2_en_large/models/nsp_model.py b/modules/text/text_generation/plato2_en_large/models/nsp_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8c8f2d9ad484e2bec70577320ef239fd85f3a2bd --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/nsp_model.py @@ -0,0 +1,107 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NSP model.""" + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from . import register_model +from .model_base import Model +from .unified_transformer import UnifiedTransformer + + +@register_model("NSPModel") +class NSPModel(UnifiedTransformer): + """NSP model.""" + + def _get_feed_dict(self, is_infer=False): + """ + Get the feed list of the model. + + Args: + is_infer(bool): True if running inference. + + Returns: + list(Variable): The feed list. + list(str): The name of each Variable in feed list. + """ + feed_dict = {} + feed_dict["token_ids"] = layers.data(name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["type_ids"] = layers.data(name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["pos_ids"] = layers.data(name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + + feed_dict["attention_mask"] = layers.data( + name="attention_mask", shape=[-1, self.max_seq_len, self.max_seq_len], dtype=self.dtype) + feed_dict["label_pos"] = layers.data(name="label_pos", shape=[-1, 1], dtype="int64") + + if not is_infer: + feed_dict["label"] = layers.data(name="label", shape=[-1, 1], dtype="int64") + feed_dict["tgt_label"] = layers.data(name="tgt_ids", shape=[-1, 1], dtype="int64") + feed_dict["tgt_pos"] = layers.data(name="tgt_pos", shape=[-1, 1], dtype="int64") + + feed_dict["data_id"] = layers.data(name="data_id", shape=[-1, 1], dtype="int64") + return feed_dict + + def _get_feed(self, inputs, is_infer=False): + return Model._get_feed(self, inputs, is_infer) + + def forward(self, inputs, is_infer=False): + outputs = {} + self.generation_caches = None + outputs["enc_out"], self.checkpoints = self._generation_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + generation_mask=inputs["attention_mask"]) + return outputs + + def _get_metrics(self, inputs, outputs): + metrics = {} + fc_out = self._calc_logits(outputs["enc_out"], inputs["tgt_pos"]) + lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_pos"]) + need_cal = layers.not_equal(inputs["tgt_label"], layers.fill_constant(shape=[1], dtype="int64", value=1)) + need_cal = layers.cast(need_cal, self.dtype) + mean_lm_loss = layers.reduce_sum(lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10) + + pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_pos"]) + nsp_fc_out = layers.fc( + input=pooled_out, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self.param_initializer), + bias_attr="next_sent_fc.b_0") + nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy( + logits=nsp_fc_out, label=inputs["label"], return_softmax=True) + + nsp_acc = layers.accuracy(nsp_softmax, inputs["label"]) + mean_nsp_loss = layers.mean(nsp_loss) + + metrics["loss"] = mean_lm_loss + mean_nsp_loss + metrics["lm_loss"] = mean_lm_loss + metrics["nsp_loss"] = mean_nsp_loss + metrics["nsp_acc"] = nsp_acc + return metrics + + def infer(self, inputs, outputs): + pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_pos"]) + nsp_fc_out = layers.fc( + input=pooled_out, + size=2, + param_attr=fluid.ParamAttr(name="next_sent_fc.w_0", initializer=self.param_initializer), + bias_attr="next_sent_fc.b_0") + scores = layers.softmax(nsp_fc_out) + predictions = {"scores": scores, "data_id": inputs["data_id"]} + return predictions + + def infer_step(self, inputs): + return Model.infer_step(self, inputs) diff --git a/modules/text/text_generation/plato2_en_large/models/optimizer.py b/modules/text/text_generation/plato2_en_large/models/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..0381f12390a2f1fb672ed8a5ed3d815874fddb21 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/optimizer.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Optimizer.""" + +import re + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +class AdamW(fluid.optimizer.AdamOptimizer): + """AdamW object for dygraph""" + + def __init__(self, *args, **kwargs): + weight_decay = kwargs.pop('weight_decay', None) + var_name_to_exclude = kwargs.pop('var_name_to_exclude', '.*layer_norm_scale|.*layer_norm_bias|.*b_0') + super(AdamW, self).__init__(*args, **kwargs) + self.wd = weight_decay + self.pat = re.compile(var_name_to_exclude) + + def apply_optimize(self, loss, startup_program, params_grads): + """Update params with weight decay.""" + super(AdamW, self).apply_optimize(loss, startup_program, params_grads) + for p, g in params_grads: + if not self.pat.match(p.name): + layers.assign(p * (1. - self.wd * self._learning_rate), p) diff --git a/modules/text/text_generation/plato2_en_large/models/plato.py b/modules/text/text_generation/plato2_en_large/models/plato.py new file mode 100644 index 0000000000000000000000000000000000000000..987a42d7c708e020557ce29f80873ceb65fc0fae --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/plato.py @@ -0,0 +1,241 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plato model.""" + +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from . import register_model +from .model_base import Model +from .unified_transformer import UnifiedTransformer +from .transformer_block import encoder, pre_process_layer +from plato2_en_large.utils import repeat_array_or_tensor +from plato2_en_large.utils.args import str2bool +from .generator import Generator + + +@register_model("Plato") +class Plato(UnifiedTransformer): + """Plato model.""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = UnifiedTransformer.add_cmdline_args(parser) + group.add_argument("--use_bow", type=str2bool, default=True) + group.add_argument("--use_entropy", type=str2bool, default=False) + return group + + def __init__(self, args, place): + # latent related + self.mask_id = args.mask_id + self.latent_type_size = args.latent_type_size + self.latent_emb_name = "latent_embedding" + self.use_bow = args.use_bow + self.use_entropy = args.use_entropy + + super(Plato, self).__init__(args, place) + + def _get_feed_dict(self, is_infer=False): + feed_dict = {} + feed_dict["token_ids"] = layers.data(name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["type_ids"] = layers.data(name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["pos_ids"] = layers.data(name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + + if not is_infer: + feed_dict["recognition_mask"] = layers.data( + name="recognition_mask", shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], dtype=self.dtype) + feed_dict["generation_mask"] = layers.data( + name="generation_mask", shape=[-1, self.max_seq_len + 1, self.max_seq_len + 1], dtype=self.dtype) + + if is_infer: + feed_dict["tgt_ids"] = layers.data( + name="tgt_ids", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["tgt_pos"] = layers.data( + name="tgt_pos", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["init_score"] = layers.data(name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) + feed_dict["parent_idx"] = layers.data(name="parent_idx", shape=[-1], dtype="int64") + + feed_dict["tgt_generation_mask"] = layers.data( + name="tgt_generation_mask", shape=[-1, 1, self.max_seq_len + 1], dtype="float32") + feed_dict["latent_id"] = layers.data(name="latent_id", shape=[-1, 1], dtype="int64") + else: + feed_dict["tgt_label"] = layers.data(name="tgt_label", shape=[-1, 1], dtype="int64") + feed_dict["tgt_pos"] = layers.data(name="tgt_pos", shape=[-1, 1], dtype="int64") + + if self.use_bow: + feed_dict["bow_label"] = layers.data(name="bow_label", shape=[-1, 1], dtype="int64") + feed_dict["bow_pos"] = layers.data(name="bow_pos", shape=[-1, 1], dtype="int64") + + feed_dict["data_id"] = layers.data(name="data_id", shape=[-1, 1], dtype="int64") + return feed_dict + + def _recognition_network(self, token_ids, type_ids, pos_ids, recognition_mask): + mask_id = layers.fill_constant_batch_size_like( + input=token_ids, shape=[-1, 1, 1], value=self.mask_id, dtype="int64") + mask_emb = layers.embedding( + input=mask_id, + size=[self.vocab_size, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.token_emb_name, initializer=self.param_initializer)) + emb_out, n_head_self_attn_mask = self._gen_input( + token_ids, type_ids, pos_ids, recognition_mask, aux_emb=mask_emb) + + recognition_out, checkpoints = self._encode(emb_out, n_head_self_attn_mask) + + recognition_feat = layers.slice(input=recognition_out, axes=[1], starts=[0], ends=[1]) + recognition_feat = layers.fc( + input=recognition_feat, + size=self.hidden_size, + act="tanh", + param_attr=fluid.ParamAttr(name="recognition_fc.w_0", initializer=self.param_initializer), + bias_attr="recognition_fc.b_0") + logits = layers.fc( + input=recognition_feat, + size=self.latent_type_size, + param_attr=fluid.ParamAttr(name=self.latent_emb_name, initializer=self.param_initializer), + bias_attr="recognition_bias") + return logits, checkpoints + + def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): + u = layers.uniform_random_batch_size_like(logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) + u.stop_gradient = True + gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) + y = logits + gumbel + return layers.softmax(y / tau) + + def forward(self, inputs, is_infer=False): + """ + Run model main forward. + """ + outputs = {} + if is_infer: + self.generation_caches = [{ + "k": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_key * self.n_head], dtype=self.dtype, value=0), + "v": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_value * self.n_head], dtype=self.dtype, value=0), + } for i in range(self.n_layer)] + else: + self.generation_caches = None + + latent_embeddings = layers.create_parameter( + shape=[self.emb_size, self.latent_type_size], + dtype=self.dtype, + attr=fluid.ParamAttr(name=self.latent_emb_name, initializer=self.param_initializer)) + + if is_infer: + latent_id = inputs["latent_id"] + weights = layers.one_hot(latent_id, self.latent_type_size) + else: + logits, recognition_checkpoints = self._recognition_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + recognition_mask=inputs["recognition_mask"], + ) + outputs["post_probs"] = layers.softmax(logits) + weights = self._gumbel_softmax(logits) + outputs["checkpoints"] = recognition_checkpoints + + latent_emb = layers.matmul(x=weights, y=latent_embeddings, transpose_y=True) + outputs["enc_out"], generation_checkpoints = self._generation_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + generation_mask=inputs["generation_mask"], + aux_emb=layers.unsqueeze(latent_emb, axes=[1]), + gather_idx=inputs.get("parent_idx", None), + ) + + if not is_infer: + outputs["checkpoints"].extend(generation_checkpoints) + return outputs + + def _calc_bow_logits(self, enc_out, checkpoints, bow_pos): + """Get the logits of generation.""" + bow_feat = layers.slice(input=enc_out, axes=[1], starts=[0], ends=[1]) + bow_feat = layers.reshape(x=bow_feat, shape=[-1, self.hidden_size]) + bow_pos = layers.cast(x=bow_pos, dtype="int32") + bow_feat = layers.gather(input=bow_feat, index=bow_pos) + + bow_trans_feat = layers.fc( + input=bow_feat, + size=self.emb_size, + act=self.hidden_act, + param_attr=fluid.ParamAttr(name="bow_trans_fc.w_0", initializer=self.param_initializer), + bias_attr=fluid.ParamAttr(name="bow_trans_fc.b_0")) + + bow_trans_feat = pre_process_layer(bow_trans_feat, self.post_cls_cmd, name="bow_trans") + + checkpoints.append(bow_trans_feat) + + if self.weight_sharing: + fc_out = layers.matmul( + x=bow_trans_feat, + y=fluid.default_main_program().global_block().var(self.token_emb_name), + transpose_y=True) + if self.cls_bias: + fc_out += layers.create_parameter( + shape=[self.vocab_size], + dtype=self.dtype, + attr=fluid.ParamAttr(name="bow_out_fc.b_0"), + is_bias=True) + else: + bow_out_bias_attr = fluid.ParamAttr(name="bow_out_fc.b_0") if self.cls_bias else False + fc_out = layers.fc( + input=bow_trans_feat, + size=self.vocab_size, + param_attr=fluid.ParamAttr(name="bow_out_fc.w_0", initializer=self.param_initializer), + bias_attr=bow_out_bias_attr) + return fc_out + + def _get_metrics(self, inputs, outputs): + metrics = super(Plato, self)._get_metrics(inputs, outputs) + + if self.use_bow: + fc_out = self._calc_bow_logits(outputs["enc_out"], outputs["checkpoints"], inputs["bow_pos"]) + bow_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["bow_label"]) + mean_bow_loss = layers.mean(bow_loss) + metrics["token_bow_loss"] = mean_bow_loss + metrics["loss"] = metrics["loss"] + mean_bow_loss + + entropy_loss = layers.reduce_sum(outputs["post_probs"] * layers.log(outputs["post_probs"]), dim=1) + mean_entropy_loss = layers.mean(entropy_loss) + metrics["entropy_loss"] = mean_entropy_loss + if self.use_entropy: + metrics["loss"] = metrics["loss"] + mean_entropy_loss + return metrics + + def infer_step(self, inputs): + """ + Run one inference step. + """ + if self.do_generation: + batch_size = len(inputs["data_id"]) + new_bsz = batch_size * self.latent_type_size + inputs = { + name: repeat_array_or_tensor(array_or_tensor, self.place, self.latent_type_size) + for name, array_or_tensor in inputs.items() + } + # Add latent_id + inputs["latent_id"] = np.array([i for i in range(self.latent_type_size) for _ in range(batch_size)], + dtype="int64").reshape([-1, 1]) + + return super(Plato, self).infer_step(inputs) + else: + return self._execute(self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict) diff --git a/modules/text/text_generation/plato2_en_large/models/transformer_block.py b/modules/text/text_generation/plato2_en_large/models/transformer_block.py new file mode 100644 index 0000000000000000000000000000000000000000..041306a591233d3e1bc1f1a1e11943d54414033c --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/transformer_block.py @@ -0,0 +1,332 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Transformer block.""" + +from functools import partial + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0., + cache=None, + gather_idx=None, + store=False, + param_initializer=None, + name="multi_head_att"): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + keys = queries if keys is None else keys + values = keys if values is None else values + + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError("Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc( + input=queries, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_query_fc.w_0", initializer=param_initializer), + bias_attr=name + "_query_fc.b_0") + k = layers.fc( + input=keys, + size=d_key * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_key_fc.w_0", initializer=param_initializer), + bias_attr=name + "_key_fc.b_0") + v = layers.fc( + input=values, + size=d_value * n_head, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_value_fc.w_0", initializer=param_initializer), + bias_attr=name + "_value_fc.b_0") + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + hidden_size = x.shape[-1] + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # The value 0 in shape attr means copying the corresponding dimension + # size of the input as the output dimension size. + return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): + """ + Scaled Dot-Product Attention + """ + scaled_q = layers.scale(x=q, scale=d_key**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + if attn_bias: + product += attn_bias + weights = layers.softmax(product, use_cudnn=True) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + if cache is not None: # use cache and concat time steps + # Since the inplace reshape in __split_heads changes the shape of k and + # v, which is the cache input for next time step, reshape the cache + # input from the previous time step first. + cache_k, cache_v = cache["k"], cache["v"] + select_k = layers.gather(cache_k, index=gather_idx) + select_v = layers.gather(cache_v, index=gather_idx) + select_k = layers.reshape(select_k, shape=[0, 0, d_key * n_head]) + select_v = layers.reshape(select_v, shape=[0, 0, d_value * n_head]) + if store: + k = layers.concat([select_k, k], axis=1) + v = layers.concat([select_v, v], axis=1) + layers.assign(k, cache["k"]) + layers.assign(v, cache["v"]) + else: + #k = select_k + #v = select_v + tmp_k = layers.concat([select_k, k[:, :1]], axis=1) + tmp_v = layers.concat([select_v, v[:, :1]], axis=1) + layers.assign(tmp_k, cache["k"]) + layers.assign(tmp_v, cache["v"]) + k = layers.concat([select_k, k], axis=1) + v = layers.concat([select_v, v], axis=1) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc( + input=out, + size=d_model, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_output_fc.w_0", initializer=param_initializer), + bias_attr=name + "_output_fc.b_0") + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name="ffn"): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc( + input=x, + size=d_inner_hid, + num_flatten_dims=2, + act=hidden_act, + param_attr=fluid.ParamAttr(name=name + "_fc_0.w_0", initializer=param_initializer), + bias_attr=name + "_fc_0.b_0") + if dropout_rate: + hidden = layers.dropout( + hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + out = layers.fc( + input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.ParamAttr(name=name + "_fc_1.w_0", initializer=param_initializer), + bias_attr=name + "_fc_1.b_0") + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., epsilon=1e-5, name=""): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.ParamAttr(name=name + "_layer_norm_scale", initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr(name=name + "_layer_norm_bias", initializer=fluid.initializer.Constant(0.)), + epsilon=epsilon) + elif cmd == "d": # add dropout + if dropout_rate: + out = layers.dropout( + out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def encoder_layer(input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name="", + epsilon=1e-5, + cache=None, + gather_idx=None, + store=False): + """ + The encoder layers that can be stacked to form a deep encoder. + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the pre_process_layer / post_process_layer to add residual connection, + layer normalization and droput. + """ + attn_output = multi_head_attention( + pre_process_layer(input, preprocess_cmd, prepostprocess_dropout, epsilon=epsilon, name=name + "_pre_att"), + None, + None, + attn_bias, + d_key, + d_value, + d_model, + n_head, + attention_dropout, + param_initializer=param_initializer, + name=name + "_multi_head_att", + cache=cache, + gather_idx=gather_idx, + store=store) + attn_output = post_process_layer( + input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + "_post_att", epsilon=epsilon) + ffd_output = positionwise_feed_forward( + pre_process_layer(attn_output, preprocess_cmd, prepostprocess_dropout, epsilon=epsilon, name=name + "_pre_ffn"), + d_inner_hid, + d_model, + relu_dropout, + hidden_act, + param_initializer=param_initializer, + name=name + "_ffn") + ffd_output = post_process_layer( + attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + "_post_ffn", epsilon=epsilon) + return ffd_output, [attn_output, ffd_output] + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd="n", + postprocess_cmd="da", + param_initializer=None, + name="", + epsilon=1e-5, + n_layer_per_block=1, + param_share="normal", + caches=None, + gather_idx=None, + store=False): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + checkpoints = [] + names = [] + if param_share == "inner_share": + for _ in range(n_layer // n_layer_per_block): + for i in range(n_layer_per_block): + names.append(name + "_layer_" + str(i)) + else: + for i in range(n_layer // n_layer_per_block): + for _ in range(n_layer_per_block): + names.append(name + "_layer_" + str(i)) + + for i in range(n_layer): + enc_output, cps = encoder_layer( + enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + hidden_act, + preprocess_cmd, + postprocess_cmd, + param_initializer=param_initializer, + epsilon=epsilon, + name=names[i], + cache=caches[i] if caches is not None else None, + gather_idx=gather_idx, + store=store) + checkpoints.extend(cps) + enc_input = enc_output + enc_output = pre_process_layer( + enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder", epsilon=epsilon) + + return enc_output, checkpoints diff --git a/modules/text/text_generation/plato2_en_large/models/unified_transformer.py b/modules/text/text_generation/plato2_en_large/models/unified_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..c4ebf115119ca0f7c0feab23ae94afe5d5aaa20f --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/models/unified_transformer.py @@ -0,0 +1,378 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unified Transformer model.""" + +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +from . import register_model +from .model_base import Model +from .transformer_block import encoder, pre_process_layer +from plato2_en_large.utils.args import str2bool +from plato2_en_large.utils import repeat_array_or_tensor, slice_array_or_tensor +from .generator import Generator + + +@register_model("UnifiedTransformer") +class UnifiedTransformer(Model): + """Unified Transformer""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = Model.add_cmdline_args(parser) + group.add_argument("--max_seq_len", type=int, default=256) + group.add_argument("--weight_sharing", type=str2bool, default=True) + group.add_argument("--mem_efficient", type=str2bool, default=False) + + Generator.add_cmdline_args(parser) + return group + + def __init__(self, args, place): + self.max_seq_len = args.max_seq_len + + self.emb_size = args.emb_size or args.hidden_size + self.hidden_size = args.hidden_size + + self.n_layer = args.num_hidden_layers + self.n_head = args.num_attention_heads + self.d_key = args.get("key_size", self.hidden_size // self.n_head) + self.d_value = args.get("value_size", self.hidden_size // self.n_head) + self.inner_hidden_size = args.get("inner_hidden_size", self.hidden_size * 4) + + self.vocab_size = args.vocab_size + self.max_position_seq_len = args.max_position_embeddings + self.type_size = args.type_vocab_size + self.token_emb_name = "word_embedding" + self.type_emb_name = "sent_embedding" + self.pos_emb_name = "pos_embedding" + + self.epsilon = args.epsilon or 1e-5 + self.n_layer_per_block = args.n_layer_per_block or 1 + self.pre_encoder_cmd = args.get("pre_encoder_cmd", "nd") + self.preprocess_cmd = args.get("preprocess_cmd", "") + self.postprocess_cmd = args.get("postprocess_cmd", "dan") + self.post_cls_cmd = args.get("post_cls_cmd", "n") + self.cls_bias = args.get("cls_bias", True) + if self.hidden_size != self.emb_size: + self.emb_mapping_in = True + else: + self.emb_mapping_in = args.get("emb_mapping_in", False) + + self.hidden_act = args.hidden_act + self.prepostprocess_dropout = args.hidden_dropout_prob + self.attention_dropout = args.attention_probs_dropout_prob + self.weight_sharing = args.weight_sharing + + self.mem_efficient = args.mem_efficient + + self.dtype = "float32" + + # Initialize all weigths by truncated normal initializer, and all biases + # will be initialized by constant zero by default. + self.param_initializer = fluid.initializer.TruncatedNormal(scale=args.initializer_range) + + # task-related + self.generator = Generator(args) + self.do_generation = args.do_generation + + super(UnifiedTransformer, self).__init__(args, place) + + def _gen_input(self, token_ids, type_ids, pos_ids, input_mask, aux_emb=None): + token_emb_out = layers.embedding( + input=token_ids, + size=[self.vocab_size, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.token_emb_name, initializer=self.param_initializer)) + type_emb_out = layers.embedding( + input=type_ids, + size=[self.type_size, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.type_emb_name, initializer=self.param_initializer)) + pos_emb_out = layers.embedding( + input=pos_ids, + size=[self.max_position_seq_len, self.emb_size], + dtype=self.dtype, + param_attr=fluid.ParamAttr(name=self.pos_emb_name, initializer=self.param_initializer)) + emb_out = token_emb_out + type_emb_out + pos_emb_out + + # auxiliary memory embeddings + if aux_emb is not None: + emb_out = layers.concat([aux_emb, emb_out], axis=1) + + # post process of embedding + emb_out = pre_process_layer( + emb_out, self.pre_encoder_cmd, self.prepostprocess_dropout, name="pre_encoder", epsilon=self.epsilon) + if self.emb_mapping_in: + emb_out = layers.fc( + input=emb_out, + num_flatten_dims=2, + size=self.hidden_size, + param_attr=fluid.ParamAttr(name="emb_hidden_mapping", initializer=self.param_initializer), + bias_attr="emb_hidden_mapping_bias") + + # generate n-head self-attention mask + self_attn_mask = input_mask + self_attn_mask = layers.scale(x=self_attn_mask, scale=1e4, bias=-1.0, bias_after_scale=False) + n_head_self_attn_mask = layers.stack(x=[self_attn_mask] * self.n_head, axis=1) + n_head_self_attn_mask.stop_gradient = True + + return emb_out, n_head_self_attn_mask + + def _get_pooled_output(self, enc_out, pos): + enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) + pos = layers.cast(x=pos, dtype="int32") + feat = layers.gather(input=enc_out, index=pos) + + pooled_out = layers.fc( + input=feat, + size=self.hidden_size, + act="tanh", + param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self.param_initializer), + bias_attr="pooled_fc.b_0") + return pooled_out + + def _generation_network(self, token_ids, type_ids, pos_ids, generation_mask, aux_emb=None, gather_idx=None): + emb_out, n_head_self_attn_mask = self._gen_input(token_ids, type_ids, pos_ids, generation_mask, aux_emb=aux_emb) + return self._encode(emb_out, n_head_self_attn_mask, self.generation_caches, gather_idx=gather_idx) + + def _encode(self, emb_out, n_head_self_attn_mask, caches=None, gather_idx=None): + return encoder( + enc_input=emb_out, + attn_bias=n_head_self_attn_mask, + n_layer=self.n_layer, + n_head=self.n_head, + d_key=self.d_key, + d_value=self.d_value, + d_model=self.hidden_size, + d_inner_hid=self.inner_hidden_size, + prepostprocess_dropout=self.prepostprocess_dropout, + attention_dropout=self.attention_dropout, + relu_dropout=0, + hidden_act=self.hidden_act, + preprocess_cmd=self.preprocess_cmd, + postprocess_cmd=self.postprocess_cmd, + param_initializer=self.param_initializer, + epsilon=self.epsilon, + n_layer_per_block=self.n_layer_per_block, + name="encoder", + caches=caches, + gather_idx=gather_idx, + store=caches is not None) + + def _gumbel_softmax(self, logits, tau=0.67, eps=1e-10): + u = layers.uniform_random_batch_size_like(logits, shape=[-1, self.latent_type_size], min=0.0, max=1.0) + u.stop_gradient = True + gumbel = 0.0 - layers.log(eps - layers.log(u + eps)) + y = logits + gumbel + return layers.softmax(y / tau) + + def _get_feed_dict(self, is_infer=False): + """ + Get the feed list of the model. + + Args: + is_infer(bool): True if running inference. + + Returns: + list(Variable): The feed list. + list(str): The name of each Variable in feed list. + """ + feed_dict = {} + feed_dict["token_ids"] = layers.data(name="token_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["type_ids"] = layers.data(name="type_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + feed_dict["pos_ids"] = layers.data(name="pos_ids", shape=[-1, self.max_seq_len, 1], dtype="int64") + + feed_dict["generation_mask"] = layers.data( + name="generation_mask", shape=[-1, self.max_seq_len, self.max_seq_len], dtype=self.dtype) + + if is_infer: + feed_dict["tgt_ids"] = layers.data( + name="tgt_ids", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["tgt_pos"] = layers.data( + name="tgt_pos", shape=[-1, self.max_seq_len, 1], dtype="int64", lod_level=2) + feed_dict["init_score"] = layers.data(name="init_score", shape=[-1, 1], dtype="float32", lod_level=1) + feed_dict["parent_idx"] = layers.data(name="parent_idx", shape=[-1], dtype="int64") + + feed_dict["tgt_generation_mask"] = layers.data( + name="tgt_generation_mask", shape=[-1, 1, self.max_seq_len], dtype="float32") + else: + feed_dict["tgt_label"] = layers.data(name="tgt_label", shape=[-1, 1], dtype="int64") + feed_dict["tgt_pos"] = layers.data(name="tgt_pos", shape=[-1, 1], dtype="int64") + + feed_dict["data_id"] = layers.data(name="data_id", shape=[-1, 1], dtype="int64") + return feed_dict + + def forward(self, inputs, is_infer=False): + """ + Run model main forward. + """ + outputs = {} + if is_infer: + self.generation_caches = [{ + "k": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_key * self.n_head], dtype=self.dtype, value=0), + "v": + layers.fill_constant_batch_size_like( + input=inputs["token_ids"], shape=[-1, 0, self.d_value * self.n_head], dtype=self.dtype, value=0), + } for i in range(self.n_layer)] + else: + self.generation_caches = None + + outputs["enc_out"], generation_checkpoints = self._generation_network( + token_ids=inputs["token_ids"], + type_ids=inputs["type_ids"], + pos_ids=inputs["pos_ids"], + generation_mask=inputs["generation_mask"], + gather_idx=inputs.get("parent_idx", None)) + + if not is_infer: + outputs["checkpoints"] = generation_checkpoints + return outputs + + def _calc_logits(self, enc_out, checkpoints=None, seq_pos=None): + """Get the logits of generation.""" + enc_out = layers.reshape(x=enc_out, shape=[-1, self.hidden_size]) + if seq_pos is not None: + seq_pos = layers.cast(x=seq_pos, dtype="int32") + seq_feat = layers.gather(input=enc_out, index=seq_pos) + else: + seq_feat = enc_out + + seq_trans_feat = layers.fc( + input=seq_feat, + size=self.emb_size, + act=self.hidden_act, + param_attr=fluid.ParamAttr(name="mask_lm_trans_fc.w_0", initializer=self.param_initializer), + bias_attr=fluid.ParamAttr(name="mask_lm_trans_fc.b_0")) + + seq_trans_feat = pre_process_layer(seq_trans_feat, self.post_cls_cmd, name="mask_lm_trans") + + if checkpoints is not None: + checkpoints.append(seq_trans_feat) + + if self.weight_sharing: + fc_out = layers.matmul( + x=seq_trans_feat, + y=fluid.default_main_program().global_block().var(self.token_emb_name), + transpose_y=True) + if self.cls_bias: + fc_out += layers.create_parameter( + shape=[self.vocab_size], + dtype=self.dtype, + attr=fluid.ParamAttr(name="mask_lm_out_fc.b_0"), + is_bias=True) + else: + seq_out_bias_attr = fluid.ParamAttr(name="mask_lm_out_fc.b_0") if self.cls_bias else False + fc_out = layers.fc( + input=seq_trans_feat, + size=self.vocab_size, + param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self.param_initializer), + bias_attr=seq_out_bias_attr) + return fc_out + + def _get_metrics(self, inputs, outputs): + metrics = {} + + fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"]) + tgt_lm_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=inputs["tgt_label"]) + mean_tgt_lm_loss = layers.mean(tgt_lm_loss) + loss = mean_tgt_lm_loss + metrics["token_lm_loss"] = mean_tgt_lm_loss + + metrics["loss"] = loss + return metrics + + def _get_statistics(self, inputs, outputs): + statistics = {} + if "tgt_label" in inputs: + statistics["tokens_num"] = layers.reduce_sum( + layers.fill_constant_batch_size_like(input=inputs["tgt_label"], value=1.0, shape=[-1], dtype="int64")) + statistics["batch_size"] = layers.reduce_sum( + layers.fill_constant_batch_size_like(input=inputs["token_ids"], value=1.0, shape=[-1], dtype="int64")) + return statistics + + def get_metrics_and_statistics(self, inputs, outputs): + """ + Get metrics and statistics. + """ + metrics = self._get_metrics(inputs, outputs) + statistics = self._get_statistics(inputs, outputs) + return metrics, statistics + + def infer(self, inputs, outputs): + """ + Run model inference. + """ + if self.do_generation: + return self.generator.inference(self, inputs, outputs) + else: + raise NotImplementedError + + def _run_generation(self, inputs): + """ + Run generation. + """ + batch_size = len(inputs["data_id"]) + inputs["parent_idx"] = np.array(range(batch_size), dtype="int64") + outputs = self._execute( + self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict, return_numpy=False) + + predictions = [] + data_id_list = np.array(outputs["data_id"]).reshape(-1).tolist() + token_ids_list = np.array(outputs["token_ids"]).squeeze(2).tolist() + seq_ids = outputs["finished_ids"] + seq_ids_np = np.array(outputs["finished_ids"]) + seq_scores_np = np.array(outputs["finished_scores"]) + for i, (data_id, token_ids) in enumerate(zip(data_id_list, token_ids_list)): + start = seq_ids.lod()[0][i] + end = seq_ids.lod()[0][i + 1] + for j in range(start, end): + sub_start = seq_ids.lod()[1][j] + sub_end = seq_ids.lod()[1][j + 1] + info = {} + info["data_id"] = data_id + info["decode_score"] = float(seq_scores_np[sub_end - 1]) + info["context_token_ids"] = token_ids + info["response_token_ids"] = seq_ids_np[sub_start:sub_end].tolist() + predictions.append(info) + return predictions + + def infer_step(self, inputs): + """ + Run one inference step. + """ + if self.do_generation: + if self.generator.num_samples: + inputs = { + name: repeat_array_or_tensor(array_or_tensor, self.place, self.generator.num_samples) + for name, array_or_tensor in inputs.items() + } + + if self.mem_efficient: + predictions = [] + for idx in range(0, len(inputs["data_id"]), self.batch_size): + part_inputs = { + name: slice_array_or_tensor(array_or_tensor, self.place, idx, idx + self.batch_size) + for name, array_or_tensor in inputs.items() + } + part_outputs = self._run_generation(part_inputs) + predictions.extend(part_outputs) + else: + predictions = self._run_generation(inputs) + return predictions + else: + return self._execute(self.infer_program, self._get_feed(inputs, is_infer=True), self.infer_fetch_dict) diff --git a/modules/text/text_generation/plato2_en_large/module.py b/modules/text/text_generation/plato2_en_large/module.py new file mode 100644 index 0000000000000000000000000000000000000000..61e684db14fa5f23b3c2270098a1da9f2278735f --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/module.py @@ -0,0 +1,180 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import json +import sys +import argparse +import contextlib +from collections import namedtuple + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving + +import plato2_en_large.models as plato_models +from plato2_en_large.tasks.dialog_generation import DialogGeneration +from plato2_en_large.utils import check_cuda, Timer +from plato2_en_large.utils.args import parse_args + + +@moduleinfo( + name="plato2_en_large", + version="1.0.0", + summary= + "A novel pre-training model for dialogue generation, incorporated with latent discrete variables for one-to-many relationship modeling.", + author="baidu-nlp", + author_email="", + type="nlp/text_generation", +) +class Plato(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + if "CUDA_VISIBLE_DEVICES" not in os.environ: + raise RuntimeError("The module only support GPU. Please set the environment variable CUDA_VISIBLE_DEVICES.") + + args = self.setup_args() + self.task = DialogGeneration(args) + self.model = plato_models.create_model(args, fluid.CUDAPlace(0)) + self.Example = namedtuple("Example", ["src", "data_id"]) + self._interactive_mode = False + + def setup_args(self): + """ + Setup arguments. + """ + assets_path = os.path.join(self.directory, "assets") + vocab_path = os.path.join(assets_path, "vocab.txt") + init_pretraining_params = os.path.join(assets_path, "32L", "Plato") + spm_model_file = os.path.join(assets_path, "spm.model") + nsp_inference_model_path = os.path.join(assets_path, "32L", "NSP") + config_path = os.path.join(assets_path, "32L.json") + + # ArgumentParser.parse_args use argv[1:], it will drop the first one arg, so the first one in sys.argv should be "" + sys.argv = [ + "", "--model", "Plato", "--vocab_path", + "%s" % vocab_path, "--do_lower_case", "False", "--init_pretraining_params", + "%s" % init_pretraining_params, "--spm_model_file", + "%s" % spm_model_file, "--nsp_inference_model_path", + "%s" % nsp_inference_model_path, "--ranking_score", "nsp_score", "--do_generation", "True", "--batch_size", + "1", "--config_path", + "%s" % config_path + ] + + parser = argparse.ArgumentParser() + plato_models.add_cmdline_args(parser) + DialogGeneration.add_cmdline_args(parser) + args = parse_args(parser) + + args.load(args.config_path, "Model") + args.run_infer = True # only build infer program + + return args + + @serving + def generate(self, texts): + """ + Get the robot responses of the input texts. + + Args: + texts(list or str): If not in the interactive mode, texts should be a list in which every element is the chat context separated with '\t'. + Otherwise, texts shoule be one sentence. The module can get the context automatically. + + Returns: + results(list): the robot responses. + """ + if not texts: + return [] + if self._interactive_mode: + if isinstance(texts, str): + self.context.append(texts.strip()) + texts = [" [SEP] ".join(self.context[-self.max_turn:])] + else: + raise ValueError("In the interactive mode, the input data should be a string.") + elif not isinstance(texts, list): + raise ValueError("If not in the interactive mode, the input data should be a list.") + + bot_responses = [] + for i, text in enumerate(texts): + example = self.Example(src=text.replace("\t", " [SEP] "), data_id=i) + record = self.task.reader._convert_example_to_record(example, is_infer=True) + data = self.task.reader._pad_batch_records([record], is_infer=True) + pred = self.task.infer_step(self.model, data)[0] # batch_size is 1 + bot_response = pred["response"] # ignore data_id and score + bot_responses.append(bot_response) + + if self._interactive_mode: + self.context.append(bot_responses[0].strip()) + return bot_responses + + @contextlib.contextmanager + def interactive_mode(self, max_turn=6): + """ + Enter the interactive mode. + + Args: + max_turn(int): the max dialogue turns. max_turn = 1 means the robot can only remember the last one utterance you have said. + """ + self._interactive_mode = True + self.max_turn = max_turn + self.context = [] + yield + self.context = [] + self._interactive_mode = False + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, optional.") + + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + results = self.generate(texts=input_data) + + return results + + +if __name__ == "__main__": + module = Plato() + for result in module.generate(["Hello", "Hello\thi, nice to meet you, my name is tom\tso your name is tom?"]): + print(result) + with module.interactive_mode(max_turn=3): + while True: + human_utterance = input() + robot_utterance = module.generate(human_utterance) + print("Robot: %s" % robot_utterance[0]) diff --git a/modules/text/text_generation/plato2_en_large/readers/dialog_reader.py b/modules/text/text_generation/plato2_en_large/readers/dialog_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..73be362f596f459f8295e13428b8eb1749117baf --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/readers/dialog_reader.py @@ -0,0 +1,442 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Dialogue Reader.""" + +import csv +from collections import namedtuple +from contextlib import contextmanager +import gzip +import os + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.incubate.fleet.collective import fleet + +from plato2_en_large.utils import pad_batch_data +from plato2_en_large.utils.args import str2bool +from plato2_en_large.utils.masking import mask +import plato2_en_large.utils.tokenization as tokenization + + +class DialogReader(object): + """The implement of DialogReader.""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Reader") + group.add_argument("--max_src_len", type=int, default=128) + group.add_argument("--max_tgt_len", type=int, default=128) + group.add_argument("--truncate_first_turn", type=str2bool, default=False) + group.add_argument("--file_format", type=str, default="file", choices=["file", "filelist"]) + group.add_argument("--data_format", type=str, default="raw", choices=["raw", "tokenized", "numerical"]) + group.add_argument("--in_tokens", type=str2bool, default=False) + group.add_argument("--batch_size", type=int, default=16) + group.add_argument("--continuous_position", type=str2bool, default=True) + group.add_argument("--random_seed", type=int, default=11) + group.add_argument("--sort_pool_size", type=int, default=2**16) + + group = parser.add_argument_group("Tokenizer") + group.add_argument("--tokenizer", type=str, default="SentencePieceTokenizer") + args, _ = parser.parse_known_args() + tokenizer_cls = getattr(tokenization, args.tokenizer) + tokenizer_cls.add_cmdline_args(parser) + return group + + def __init__(self, args): + tokenizer_cls = getattr(tokenization, args.tokenizer) + self.tokenizer = tokenizer_cls(args) + self.vocab = self.tokenizer.vocab + self.pad_id = args.pad_id = self.vocab["[PAD]"] + self.bos_id = args.bos_id = self.vocab["[CLS]"] + self.eos_id = args.eos_id = self.vocab["[SEP]"] + self.unk_id = args.unk_id = self.vocab["[UNK]"] + self.mask_id = args.mask_id = self.vocab["[MASK]"] + self.vocab_size = args.get("vocab_size", 0) + self.max_src_len = args.max_src_len + self.max_tgt_len = args.max_tgt_len + self.truncate_first_turn = args.truncate_first_turn + self.file_format = args.file_format + self.data_format = args.data_format + self.in_tokens = args.in_tokens + self.batch_size = args.batch_size + self.continuous_position = args.continuous_position + self.sort_pool_size = args.sort_pool_size + + # random_seed must be set for data slicing when using multi-gpu + self.global_rng = np.random.RandomState(args.random_seed) + + # training progress + self.current_example = 0 + self.current_epoch = 0 + self.num_examples = 0 + + # model related + + self.fields = ["token_ids", "type_ids", "pos_ids"] + self.num_numerical_fields = len(self.fields) + self.fields += ["tgt_start_idx", "data_id"] + self.sort_key = lambda record: [len(record.token_ids)] + + self.Record = namedtuple("Record", self.fields, defaults=(None, ) * len(self.fields)) + + self.features = {} + return + + def get_train_progress(self): + """Gets progress for training phase.""" + return self.current_epoch, self.current_file_index, self.total_file + + def _convert_example_to_record(self, example, is_infer): + # process src + src_token_ids = [] + src_pos_ids = [] + + # tokenize src + s_token_ids_list = [] + for s in example.src.split("[SEP]"): + s = tokenization.convert_to_unicode(s).strip() + + if self.data_format == "tokenized": + s_tokens = s.split(" ") + else: + s_tokens = self.tokenizer.tokenize(s) + + s_token_ids = self.tokenizer.convert_tokens_to_ids(s_tokens) + [self.eos_id] + s_token_ids_list.append(s_token_ids) + + # trim src + idx = len(s_token_ids_list) - 1 + total_token_num = 1 + while idx >= 0: + total_token_num += len(s_token_ids_list[idx]) + if total_token_num > self.max_src_len: + if self.truncate_first_turn and idx == 0: + truncated_ids = s_token_ids_list[idx][:self.max_src_len - total_token_num] + if len(truncated_ids) > 1: + s_token_ids_list[idx] = truncated_ids[:-1] + [self.eos_id] + idx -= 1 + break + idx -= 1 + + for i, s_token_ids in enumerate(s_token_ids_list[idx + 1:], idx + 1): + src_token_ids += s_token_ids + src_pos_ids += list(range(1, len(s_token_ids) + 1)) + + src_token_ids = [self.bos_id] + src_token_ids + src_type_ids = [0] * len(src_token_ids) + src_pos_ids = [0] + src_pos_ids + assert len(src_token_ids) == len(src_type_ids) == len(src_pos_ids), \ + "not len(src_token_ids) == len(src_type_ids) == len(src_pos_ids)" + + token_ids = src_token_ids + type_ids = src_type_ids + pos_ids = src_pos_ids + tgt_start_idx = len(token_ids) + + if not is_infer: + # process tgt + # tokenize tgt + tgt = tokenization.convert_to_unicode(example.tgt).strip() + if self.data_format == "tokenized": + tgt_tokens = tgt.split(" ") + else: + tgt_tokens = self.tokenizer.tokenize(tgt) + + tgt_token_ids = self.tokenizer.convert_tokens_to_ids(tgt_tokens) + tgt_token_ids.append(self.eos_id) + + # trim tgt + if len(tgt_token_ids) > self.max_tgt_len - 1: + tgt_token_ids = tgt_token_ids[:self.max_tgt_len - 1] + + tgt_token_ids = [self.bos_id] + tgt_token_ids + tgt_type_ids = [1] * len(tgt_token_ids) + tgt_pos_ids = list(range(1, len(tgt_token_ids) + 1)) + assert len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids), \ + "not len(tgt_token_ids) == len(tgt_type_ids) == len(tgt_pos_ids)" + + token_ids += tgt_token_ids + type_ids += tgt_type_ids + pos_ids += tgt_pos_ids + + assert len(token_ids) == len(type_ids) == len(pos_ids), \ + "not len(token_ids) == len(type_ids) == len(pos_ids)" + + if self.continuous_position: + src_pos_ids = list(range(len(src_token_ids))) + if not is_infer: + tgt_pos_ids = list(range(len(tgt_token_ids))) + pos_ids = list(range(len(token_ids))) + + field_values = {"token_ids": src_token_ids, "type_ids": src_type_ids, "pos_ids": src_pos_ids} + field_values["tgt_start_idx"] = tgt_start_idx + field_values["data_id"] = example.data_id + + record = self.Record(**field_values) + return record + + def _read_tsv(self, fp, phase, is_infer, delimiter="\t", quotechar=None): + """Reads a tab separated value file.""" + csv.field_size_limit(2**20) + reader = csv.reader(fp, delimiter=delimiter, quotechar=quotechar) + headers = next(reader) + headers.append("data_id") + Example = namedtuple("Example", headers) + + for i, line in enumerate(reader): + example = Example(*line, data_id=i) + if is_infer or phase.endswith("test"): + self.features[phase][i] = example + record = self._convert_example_to_record(example, is_infer) + yield record + + def _read_numerical_file(self, fp, delimiter=";"): + for i, line in enumerate(fp): + cols = tokenization.convert_to_unicode(line).strip().split(delimiter) + cols = list(map(lambda x: list(map(int, x.split(" "))), cols)) + if len(cols) > self.num_numerical_fields: + cols = cols[:self.num_numerical_fields] + tgt_start_idx = cols[0].index(self.bos_id, 1) + record = self.Record(*cols, tgt_start_idx=tgt_start_idx, data_id=i) + yield record + + def _read_file(self, input_file, phase, is_infer): + def __wrapper__(): + with open_file(input_file) as fp: + if self.data_format == "numerical": + records = self._read_numerical_file(fp) + else: + records = self._read_tsv(fp, phase, is_infer) + for record in records: + yield record + + return __wrapper__ + + def _read_files(self, filelist, phase, is_infer, shuffle_files): + input_files = open(filelist).readlines() + + def __wrapper__(): + if shuffle_files: + self.global_rng.shuffle(input_files) + + if phase == "train": + self.total_file = len(input_files) + for file_index, input_file in enumerate(input_files, 1): + if phase == "train": + self.current_file_index = file_index + self.current_file = input_file + file_reader = self._read_file(input_file.strip(), phase, is_infer) + for record in file_reader(): + yield record + + return __wrapper__ + + def _batch_reader(self, reader, phase=None, is_infer=False, sort_pool_size=2**16): + """Construct a batch reader.""" + + def update_max_lens(max_lens, record): + """Update max_lens.""" + if max_lens is None: + return self.sort_key(record) + else: + return [max(max_len, l) for max_len, l in zip(max_lens, self.sort_key(record))] + + def get_batch(reader): + """Generate batches from reader.""" + batch, max_lens = [], None + for record in reader(): + if record is None: + yield batch + batch, max_lens = [], None + continue + + self.current_example += 1 + max_lens = update_max_lens(max_lens, record) + if self.in_tokens: + to_append = (len(batch) + 1) * sum(max_lens) <= self.batch_size + else: + to_append = len(batch) < self.batch_size + if to_append: + batch.append(record) + else: + yield batch + batch, max_lens = [record], self.sort_key(record) + + if len(batch) > 0: + yield batch + + def get_sorted_batch(pool): + """Generate sorted batches from pool.""" + pool = sorted(pool, key=self.sort_key) + batches = [] + batch, max_lens = [], None + for record in pool: + self.current_example += 1 + max_lens = update_max_lens(max_lens, record) + if self.in_tokens: + to_append = (len(batch) + 1) * sum(max_lens) <= self.batch_size + else: + to_append = len(batch) < self.batch_size + if to_append: + batch.append(record) + else: + batches.append(batch) + batch, max_lens = [record], self.sort_key(record) + + if len(batch) > 0: + batches.append(batch) + self.global_rng.shuffle(batches) + + for batch in batches: + yield batch + + def __wrapper__(): + if sort_pool_size > 0: + pool = [] + for record in reader(): + pool.append(record) + if len(pool) == sort_pool_size: + for batch in get_sorted_batch(pool): + yield batch + pool = [] + if len(pool) > 0: + for batch in get_sorted_batch(pool): + yield batch + else: + for batch in get_batch(reader): + yield batch + + return __wrapper__ + + def _distributed_batch_reader(self, batch_reader, num_part, part_id, is_test=False): + def __wrapper__(): + batches = [] + for batch in batch_reader(): + batches.append(batch) + if len(batches) == num_part: + yield batches[part_id] + batches = [] + if is_test and 0 <= part_id < len(batches): + yield batches[part_id] + return + + return __wrapper__ + + def data_generator(self, + input_file=None, + reader=None, + num_epochs=1, + num_part=1, + part_id=0, + phase=None, + is_infer=False): + """Data generator.""" + + def __wrapper__(): + if is_infer or phase.endswith("test"): + self.features[phase] = {} + + nonlocal reader + if reader is None: + if self.file_format == "filelist": + reader = self._read_files(input_file, phase, is_infer, not phase.endswith("test")) + else: + if phase == "train": + self.total_file = 1 + self.current_file_index = 1 + self.current_file = input_file + reader = self._read_file(input_file, phase, is_infer) + + batch_reader = self._batch_reader( + reader, phase, is_infer, sort_pool_size=self.sort_pool_size if not is_infer else 0) + if phase == "train": + batch_reader = self._distributed_batch_reader(batch_reader, num_part, part_id) + elif phase.startswith("distributed"): + batch_reader = self._distributed_batch_reader(batch_reader, num_part, part_id, is_test=True) + + for epoch_index in range(num_epochs): + if phase == "train": + self.current_example = 0 + self.current_epoch = epoch_index + 1 + for batch in batch_reader(): + yield self._pad_batch_records(batch, is_infer) + + return __wrapper__ + + def _gen_self_attn_mask(self, batch_token_ids, batch_tgt_start_idx=None, is_unidirectional=True, shift_len=0): + max_len = max(map(len, batch_token_ids)) + input_mask_data = np.zeros((len(batch_token_ids), max_len + shift_len, max_len + shift_len)) + if is_unidirectional: + for index, mask_data in enumerate(input_mask_data): + start = 0 if batch_tgt_start_idx is None else batch_tgt_start_idx[index] + end = len(batch_token_ids[index]) + mask_data[:end + shift_len, :start + shift_len] = 1.0 + # Generate the lower triangular matrix using the slice of matrix + b = np.tril(np.ones([end - start, end - start]), 0) + mask_data[start + shift_len:end + shift_len, start + shift_len:end + shift_len] = b + else: + for index, token_ids in enumerate(batch_token_ids): + input_mask_data[index, :len(token_ids) + shift_len, :len(token_ids) + shift_len] = 1.0 + return input_mask_data.astype("float32") + + def _pad_batch_records(self, batch_records, is_infer): + """ + Padding batch records and construct model's inputs. + """ + batch_size = len(batch_records) + batch = {} + batch_token_ids = [record.token_ids for record in batch_records] + batch_type_ids = [record.type_ids for record in batch_records] + batch_pos_ids = [record.pos_ids for record in batch_records] + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + + batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] + batch["generation_mask"] = self._gen_self_attn_mask(batch_token_ids, batch_tgt_start_idx=batch_tgt_start_idx) + + if is_infer: + tgt_ids = np.array([[[self.bos_id]]] * len(batch_token_ids), dtype="int64") + if self.continuous_position: + tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") + else: + tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") + tgt_pos = tgt_pos.reshape(-1, 1, 1) + batch["init_score"] = np.zeros_like(tgt_ids, dtype="float32").reshape(-1, 1).tolist() + batch["tgt_ids"] = tgt_ids.tolist() + batch["tgt_pos"] = tgt_pos.tolist() + + batch["tgt_generation_mask"] = batch["generation_mask"][:, 0:1, :].astype("float32") + else: + batch["tgt_label"], batch["tgt_pos"] = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + sent_b_starts=batch_tgt_start_idx, + is_unidirectional=True) + + batch_data_id = [record.data_id for record in batch_records] + batch["data_id"] = np.array(batch_data_id).astype("int64").reshape([-1, 1]) + return batch + + +@contextmanager +def open_file(filename): + """Open file.""" + if filename.endswith(".gz"): + fp = gzip.open(filename, "rt") + else: + fp = open(filename) + yield fp + fp.close() diff --git a/modules/text/text_generation/plato2_en_large/readers/nsp_reader.py b/modules/text/text_generation/plato2_en_large/readers/nsp_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..8258ed3065461235dd0e1a6e683a22578d3e47d0 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/readers/nsp_reader.py @@ -0,0 +1,148 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NSP Reader.""" + +from collections import namedtuple + +import numpy as np + +from plato2_en_large.readers.dialog_reader import DialogReader +from plato2_en_large.utils import pad_batch_data +from plato2_en_large.utils.args import str2bool +from plato2_en_large.utils.masking import mask + + +class NSPReader(DialogReader): + """NSP Reader.""" + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = DialogReader.add_cmdline_args(parser) + group.add_argument( + "--attention_style", type=str, default="bidirectional", choices=["bidirectional", "unidirectional"]) + group.add_argument("--mix_negative_sample", type=str2bool, default=False) + return group + + def __init__(self, args): + super(NSPReader, self).__init__(args) + self.fields.append("label") + self.Record = namedtuple("Record", self.fields, defaults=(None, ) * len(self.fields)) + + self.attention_style = args.attention_style + self.mix_negative_sample = args.mix_negative_sample + return + + def _convert_example_to_record(self, example, is_infer): + record = super(NSPReader, self)._convert_example_to_record(example, False) + if "label" in example._fields: + record = record._replace(label=int(example.label)) + return record + + def _mix_negative_sample(self, reader, neg_pool_size=2**16): + def gen_from_pool(pool): + num_samples = len(pool) + if num_samples == 1: + # only one sample: it is impossible to generate negative sample + yield pool[0]._replace(label=1) + return + self.global_rng.shuffle(pool) + for i in range(num_samples): + pool[i] = pool[i]._replace(label=1) + j = (i + 1) % num_samples + idx_i = pool[i].tgt_start_idx + idx_j = pool[j].tgt_start_idx + field_values = {} + field_values["token_ids"] = pool[i].token_ids[:idx_i] + pool[j].token_ids[idx_j:] + field_values["type_ids"] = pool[i].type_ids[:idx_i] + pool[j].type_ids[idx_j:] + field_values["pos_ids"] = list(range(len(field_values["token_ids"]))) + neg_record = self.Record(**field_values, tgt_start_idx=idx_i, data_id=-1, label=0) + pool.append(neg_record) + assert len(neg_record.token_ids) <= self.max_seq_len + self.global_rng.shuffle(pool) + for record in pool: + yield record + + def __wrapper__(): + pool = [] + for record in reader(): + pool.append(record) + if len(pool) == neg_pool_size: + for record in gen_from_pool(pool): + yield record + pool = [] + if len(pool) > 0: + for record in gen_from_pool(pool): + yield record + + return __wrapper__ + + def _batch_reader(self, reader, phase=None, is_infer=False, sort_pool_size=2**16): + if self.mix_negative_sample: + reader = self._mix_negative_sample(reader) + return super(NSPReader, self)._batch_reader( + reader, phase=phase, is_infer=is_infer, sort_pool_size=sort_pool_size) + + def _pad_batch_records(self, batch_records, is_infer): + """ + Padding batch records and construct model's inputs. + """ + batch = {} + batch_token_ids = [record.token_ids for record in batch_records] + batch_type_ids = [record.type_ids for record in batch_records] + batch_pos_ids = [record.pos_ids for record in batch_records] + batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] + batch_label = [record.label for record in batch_records] + + if self.attention_style == "unidirectional": + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + tgt_label, tgt_pos, label_pos = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + bos_id=self.bos_id, + sent_b_starts=batch_tgt_start_idx, + labels=batch_label, + is_unidirectional=True) + attention_mask = self._gen_self_attn_mask(batch_token_ids, batch_tgt_start_idx) + else: + batch_mask_token_ids, tgt_label, tgt_pos, label_pos = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + bos_id=self.bos_id, + eos_id=self.eos_id, + mask_id=self.mask_id, + sent_b_starts=batch_tgt_start_idx, + labels=batch_label, + is_unidirectional=False) + if not is_infer: + batch_token_ids = batch_mask_token_ids + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + attention_mask = self._gen_self_attn_mask(batch_token_ids, is_unidirectional=False) + + batch["attention_mask"] = attention_mask + batch["label_pos"] = label_pos + + if not is_infer: + batch_label = np.array(batch_label).astype("int64").reshape([-1, 1]) + batch["label"] = batch_label + batch["tgt_label"] = tgt_label + batch["tgt_pos"] = tgt_pos + + batch_data_id = [record.data_id for record in batch_records] + batch["data_id"] = np.array(batch_data_id).astype("int64").reshape([-1, 1]) + return batch diff --git a/modules/text/text_generation/plato2_en_large/readers/plato_reader.py b/modules/text/text_generation/plato2_en_large/readers/plato_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..8502f817b277fec855305dbff60b9aa48e8d6cef --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/readers/plato_reader.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Plato Reader.""" + +import numpy as np + +from plato2_en_large.readers.dialog_reader import DialogReader +from plato2_en_large.utils import pad_batch_data +from plato2_en_large.utils.masking import mask + + +class PlatoReader(DialogReader): + """The implement of PlatoReader""" + + def __init__(self, args): + super(PlatoReader, self).__init__(args) + self.latent_type_size = args.latent_type_size + self.use_bow = args.use_bow + + def _pad_batch_records(self, batch_records, is_infer): + """ + Padding batch records and construct model's inputs. + """ + batch = {} + batch_token_ids = [record.token_ids for record in batch_records] + batch_type_ids = [record.type_ids for record in batch_records] + batch_pos_ids = [record.pos_ids for record in batch_records] + + batch_tgt_start_idx = [record.tgt_start_idx for record in batch_records] + + batch_size = len(batch_token_ids) + + # padding + batch["token_ids"] = pad_batch_data(batch_token_ids, pad_id=self.pad_id) + batch["type_ids"] = pad_batch_data(batch_type_ids, pad_id=self.pad_id) + batch["pos_ids"] = pad_batch_data(batch_pos_ids, pad_id=self.pad_id) + + batch["generation_mask"] = self._gen_self_attn_mask( + batch_token_ids, batch_tgt_start_idx=batch_tgt_start_idx, is_unidirectional=True, shift_len=1) + if not is_infer: + batch["recognition_mask"] = self._gen_self_attn_mask(batch_token_ids, is_unidirectional=False, shift_len=1) + + if is_infer: + tgt_ids = np.array([[[self.bos_id]]] * batch_size, dtype="int64") + if self.continuous_position: + tgt_pos = np.array(batch_tgt_start_idx, dtype="int64") + else: + tgt_pos = np.zeros_like(batch_tgt_start_idx, dtype="int64") + tgt_pos = tgt_pos.reshape(-1, 1, 1) + batch["init_score"] = np.zeros_like(tgt_ids, dtype="float32").reshape(-1, 1).tolist() + batch["tgt_ids"] = tgt_ids.tolist() + batch["tgt_pos"] = tgt_pos.tolist() + batch["parent_idx"] = np.array(range(batch_size), dtype="int32") + + batch["tgt_generation_mask"] = batch["generation_mask"][:, 0:1, :].astype("float32") + else: + mask_return_list = mask( + batch_tokens=batch_token_ids, + vocab_size=self.vocab_size, + sent_b_starts=batch_tgt_start_idx, + is_unidirectional=True, + use_latent=True, + use_bow=self.use_bow) + batch["tgt_label"] = mask_return_list[0] + batch["tgt_pos"] = mask_return_list[1] + if self.use_bow: + batch["bow_label"] = mask_return_list[2] + batch["bow_pos"] = mask_return_list[3] + + batch_data_id = [record.data_id for record in batch_records] + batch["data_id"] = np.array(batch_data_id).astype("int64").reshape([-1, 1]) + return batch diff --git a/hub_module/modules/text/text_generation/plato2_en_large/tasks/__init__.py b/modules/text/text_generation/plato2_en_large/tasks/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_large/tasks/__init__.py rename to modules/text/text_generation/plato2_en_large/tasks/__init__.py diff --git a/modules/text/text_generation/plato2_en_large/tasks/dialog_generation.py b/modules/text/text_generation/plato2_en_large/tasks/dialog_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..5f425846df486ffd74278ada469eb8108f54f3c1 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/tasks/dialog_generation.py @@ -0,0 +1,292 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Dialogue generation task.""" + +from collections import defaultdict +import math + +from plato2_en_large.readers.dialog_reader import DialogReader +from plato2_en_large.readers.plato_reader import PlatoReader +from plato2_en_large.tasks import register_task +from plato2_en_large.tasks.task_base import Task +from plato2_en_large.utils.args import str2bool +from plato2_en_large.utils.inference import create_predictor + + +def post_process_context(token_ids, reader, merge=True): + """Post-process the context sequence.""" + context = [] + utt = [] + for tok_id in token_ids[1:]: + if tok_id == reader.eos_id: + utt = reader.tokenizer.convert_ids_to_tokens(utt) + if merge: + utt = reader.tokenizer.merge_subword(utt) + context.append(utt) + utt = [] + else: + utt.append(tok_id) + return context + + +def post_process_response(token_ids, reader, merge=True): + """ + Post-process the decoded sequence. Truncate from the first + and remove the and tokens currently. + """ + eos_pos = len(token_ids) + for i, tok_id in enumerate(token_ids): + if tok_id == reader.eos_id: + eos_pos = i + break + token_ids = token_ids[1:eos_pos] + response = reader.tokenizer.convert_ids_to_tokens(token_ids) + if merge: + response = reader.tokenizer.merge_subword(response) + return token_ids, response + + +def get_cross_turn_repetition(context, pred_tokens, eos_idx, is_cn=False): + """Get cross-turn repetition.""" + if len(pred_tokens) == 0: + return 1.0 + if is_cn: + context = ["".join(utt) for utt in context] + pred_tokens = "".join(pred_tokens) + + pred_tri_grams = set() + for i in range(len(pred_tokens) - 2): + tri_gram = tuple(pred_tokens[i:i + 3]) + pred_tri_grams.add(tri_gram) + for utt in context: + for i in range(len(utt) - 2): + tri_gram = tuple(utt[i:i + 3]) + if tri_gram in pred_tri_grams: + return 1.0 + return 0.0 + + +def get_in_turn_repetition(pred, is_cn=False): + """Get in-turn repetition.""" + if len(pred) == 0: + return 1.0 + if isinstance(pred[0], str): + pred = [tok.lower() for tok in pred] + if is_cn: + pred = "".join(pred) + tri_grams = set() + for i in range(len(pred) - 2): + tri_gram = tuple(pred[i:i + 3]) + if tri_gram in tri_grams: + return 1.0 + tri_grams.add(tri_gram) + return 0.0 + + +def get_nsp_score_batch(nsp_predictor, predictions): + """ + Get NSP scores of a batch. + """ + import argparse + from collections import namedtuple + + from plato2_en_large.readers.nsp_reader import NSPReader + from plato2_en_large.utils.args import parse_args + from plato2_en_large.tasks.next_sentence_prediction import NextSentencePrediction + + parser = argparse.ArgumentParser() + NextSentencePrediction.add_cmdline_args(parser) + parser.add_argument("--num_samples", type=int, default=None) + parser.add_argument("--config_path", type=str, required=True) + parser.add_argument("--mem_efficient", type=str2bool, default=False) + + args = parse_args(parser, allow_unknown=True) + args.load(args.config_path) + if not args.mem_efficient: + if args.num_samples: + args.batch_size *= args.num_samples + if args.latent_type_size: + args.batch_size *= args.latent_type_size + args.tokenized_input = True + reader = NSPReader(args) + + def __reader__(): + headers = ["src", "tgt", "data_id"] + + Example = namedtuple("Example", headers) + + for i, info in enumerate(predictions): + context = post_process_context(info["context_token_ids"], reader, merge=False) + context_tokenized_input = " [SEP] ".join(" ".join(utt) for utt in context) + _, response = post_process_response(info["response_token_ids"], reader, merge=False) + response_tokenized_input = " ".join(response) + example = Example(src=context_tokenized_input, tgt=response_tokenized_input, data_id=i) + record = reader._convert_example_to_record(example, is_infer=True) + yield record + return + + generator = reader.data_generator( + reader=__reader__, + is_infer=True, + phase="test", + ) + + steps = 0 + for data in generator(): + outputs = nsp_predictor(data) + for probs, data_id in zip(outputs[0], outputs[-1]): + data_id = data_id[0] + info = predictions[data_id] + info["nsp_score"] = float(probs[1]) + + return + + +@register_task("DialogGeneration") +class DialogGeneration(Task): + """ + Define dialogue response generation. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = parser.add_argument_group("Task") + group.add_argument("--do_generation", type=str2bool, default=False) + group.add_argument("--is_cn", type=str2bool, default=False) + + group.add_argument("--nsp_inference_model_path", type=str, default=None) + group.add_argument("--nsp_attention_style", type=str, default="bidirectional") + + group.add_argument("--ranking_score", type=str, default="decode_score") + + args, _ = parser.parse_known_args() + if args.model == "Plato": + PlatoReader.add_cmdline_args(parser) + else: + DialogReader.add_cmdline_args(parser) + return group + + def __init__(self, args): + super(DialogGeneration, self).__init__(args) + self.do_generation = args.do_generation + self.is_cn = args.is_cn + if args.model == "Plato": + self.reader = PlatoReader(args) + else: + self.reader = DialogReader(args) + + if args.nsp_inference_model_path: + self.nsp_predictor = create_predictor(args.nsp_inference_model_path, args.is_distributed) + self.nsp_attention_style = args.nsp_attention_style + else: + self.nsp_predictor = None + + self.ranking_score = args.ranking_score + self.max_dec_len = args.max_dec_len + return + + def _post_process_generation_output(self, predictions): + """ + Post process generation output. + + Calculate repetion, reranking. + """ + for info in predictions: + tokens = post_process_context(info["context_token_ids"], self.reader) + pred_token_ids, pred_tokens = post_process_response(info["response_token_ids"], self.reader) + info["context"] = " [SEP] ".join(" ".join(u) for u in tokens) + info["response"] = " ".join(pred_tokens) + info["num_token"] = len(pred_token_ids) + info["cross_turn_repetition"] = get_cross_turn_repetition(tokens, pred_tokens, self.reader.eos_id, + self.is_cn) + info["in_turn_repetition"] = max( + get_in_turn_repetition(pred_tokens, self.is_cn), get_in_turn_repetition(pred_token_ids)) + if self.nsp_predictor is not None: + get_nsp_score_batch(self.nsp_predictor, predictions) + + group = defaultdict(list) + for info in predictions: + group[info["data_id"]].append(info) + + predictions = [] + for data_id in group: + infos = group[data_id] + for info in infos: + info["score"] = info[self.ranking_score] + if self.max_dec_len is not None and info["num_token"] >= self.max_dec_len: # not ending + info["score"] -= 1e3 + elif info["cross_turn_repetition"] > 0: + info["score"] -= 1e3 + elif info["in_turn_repetition"] > 0: + info["score"] -= 1e3 + infos = sorted(infos, key=lambda info: -info["score"]) + pred = infos[0] + keep_attr = ["data_id", "score", "response"] + pred = {k: pred[k] for k in keep_attr} + predictions.append(pred) + return predictions + + def _post_process_scoring_output(self, predictions): + raise NotImplementedError + + def _post_process_infer_output(self, predictions): + if self.do_generation: + return self._post_process_generation_output(predictions) + else: + return self._post_process_scoring_output(predictions) + + def merge_mertrics_and_statistics(self, outputs, part_outputs): + """ + Merge two evaulation output. + """ + if outputs is None: + return part_outputs + + if part_outputs is None: + return outputs + + batch_size = outputs.pop("batch_size") + tokens_num = outputs.pop("tokens_num") + part_batch_size = part_outputs.pop("batch_size") + part_tokens_num = part_outputs.pop("tokens_num") + + new_outputs = {"batch_size": batch_size + part_batch_size, "tokens_num": tokens_num + part_tokens_num} + for k in outputs: + if k.startswith("token_"): + new_outputs[k] = ( + outputs[k] * tokens_num + part_outputs[k] * part_tokens_num) / new_outputs["tokens_num"] + else: + new_outputs[k] = ( + outputs[k] * batch_size + part_outputs[k] * part_batch_size) / new_outputs["batch_size"] + return new_outputs + + def get_metrics(self, outputs): + """ + Get metrics. + """ + if outputs is None: + raise ValueError("metrics is None") + outputs = dict(outputs) + outputs.pop("batch_size", None) + outputs.pop("tokens_num", None) + metrics = {} + for k in outputs: + if k.startswith("token_"): + metrics[k[6:]] = outputs[k] + else: + metrics[k] = outputs[k] + if k == "token_lm_loss": + metrics["ppl"] = math.exp(outputs[k]) + return metrics diff --git a/modules/text/text_generation/plato2_en_large/tasks/next_sentence_prediction.py b/modules/text/text_generation/plato2_en_large/tasks/next_sentence_prediction.py new file mode 100644 index 0000000000000000000000000000000000000000..0f0d65d4156974e3dab92be21ae46f2f1da8f919 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/tasks/next_sentence_prediction.py @@ -0,0 +1,44 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Next sentence prediction task.""" + +from plato2_en_large.readers.nsp_reader import NSPReader +from plato2_en_large.tasks import register_task +from plato2_en_large.tasks.task_base import Task +from plato2_en_large.utils.args import str2bool + + +@register_task("NextSentencePrediction") +class NextSentencePrediction(Task): + """ + Define dialogue response generation. + """ + + @classmethod + def add_cmdline_args(cls, parser): + """Add cmdline argurments.""" + group = NSPReader.add_cmdline_args(parser) + return group + + def __init__(self, args): + super(NextSentencePrediction, self).__init__(args) + self.reader = NSPReader(args) + return + + def _post_process_infer_output(self, predictions): + predictions = [{ + "data_id": data_id.tolist()[0], + "score": score.tolist()[1] + } for data_id, score in zip(predictions["data_id"], predictions["scores"])] + return predictions diff --git a/modules/text/text_generation/plato2_en_large/tasks/task_base.py b/modules/text/text_generation/plato2_en_large/tasks/task_base.py new file mode 100644 index 0000000000000000000000000000000000000000..dc4bbe44fca94ef7d0325aeb2986c435a0e40641 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/tasks/task_base.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Task base.""" + +from abc import abstractmethod, ABC + +from plato2_en_large.models.model_base import Model + + +class Task(ABC): + """ + Basic task. + """ + + def __init__(self, args): + return + + def train_step(self, model: Model, inputs): + """Run one training step.""" + outputs = model.train_step(inputs) + outputs = {k: v.tolist()[0] for k, v in outputs.items()} + return outputs + + def eval_step(self, model: Model, inputs): + """Run one evaluation step""" + outputs = model.eval_step(inputs) + outputs = {k: v.tolist()[0] for k, v in outputs.items()} + return outputs + + def infer_step(self, model: Model, inputs): + """Run one inference step.""" + predictions = model.infer_step(inputs) + outputs = self._post_process_infer_output(predictions) + return outputs + + def _post_process_infer_output(self, predictions): + """ + Post-process inference output. + """ + return predictions + + def merge_mertrics_and_statistics(self, outputs, part_outputs): + """ + Merge metrics and statistics. + """ + if outputs is None: + return part_outputs + + if part_outputs is None: + return outputs + + batch_size = outputs.pop("batch_size") + part_batch_size = part_outputs.pop("batch_size") + + new_outputs = { + "batch_size": batch_size + part_batch_size, + } + for k in outputs: + new_outputs[k] = (outputs[k] * batch_size + part_outputs[k] * part_batch_size) / new_outputs["batch_size"] + return new_outputs + + def get_metrics(self, outputs): + """ + Get metrics. + """ + if outputs is None: + raise ValueError("metrics is None") + outputs = dict(outputs) + # pop statistics + outputs.pop("batch_size", None) + return outputs + + def get_data_loader(self, model, *args, is_infer=False, **kwargs): + generator = self.reader.data_generator(*args, is_infer=is_infer, **kwargs) + return model.get_data_loader(generator, is_infer) diff --git a/modules/text/text_generation/plato2_en_large/utils/__init__.py b/modules/text/text_generation/plato2_en_large/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4068cbd6fe3a2b818b4a435fe79738837a4f812f --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/utils/__init__.py @@ -0,0 +1,173 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils.""" + +from itertools import chain +import os +import time +import sys + +import numpy as np +import paddle.fluid as fluid + + +def to_lodtensor(data, place): + """Convert data to LoDTensor.""" + if place is None: + return data + lengths = [] + while isinstance(data[0], list): + lengths.append(list(map(len, data))) + data = [x for xs in data for x in xs] + if isinstance(data[0], float): + data = np.array(data, dtype="float32") + else: + data = np.array(data, dtype="int64") + data_tensor = fluid.LoDTensor() + data_tensor.set(data, place) + data_tensor.set_recursive_sequence_lengths(lengths) + return data_tensor + + +def pad_batch_data(insts, pad_id=0): + """Pad the instances to the max sequence length in batch. """ + max_len = max(map(len, insts)) + inst_data = np.array([list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts]) + return inst_data.astype("int64").reshape([-1, max_len, 1]) + + +def convert_lodtensor_to_list(tensor): + data = np.array(tensor) + recursive_sequence_lengths = tensor.recursive_sequence_lengths() + recursive_sequence_lengths.reverse() + for i, lengths in enumerate(recursive_sequence_lengths): + shift = 0 + new_data = [] + for j, l in enumerate(lengths): + new_data.append(data[shift:shift + l]) + shift += l + data = new_data + return data + + +def concatenate_lodtensors(tensors, place): + """Concatenate LoD tensors.""" + data = [] + recursive_sequence_lengths = [] + for tensor in tensors: + data.append(np.array(tensor)) + recursive_sequence_lengths.append(tensor.recursive_sequence_lengths()) + data = np.concatenate(data, axis=0) + recursive_sequence_lengths = [sum(lens, []) for lens in zip(*recursive_sequence_lengths)] + data_tensor = fluid.LoDTensor() + data_tensor.set(data, place) + data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) + assert data_tensor.has_valid_recursive_sequence_lengths() + return data_tensor + + +def repeat_array_or_tensor(array_or_tensor, place, times): + """Repeate numpy array or LoD tensor.""" + if isinstance(array_or_tensor, fluid.LoDTensor): + data = [np.array(array_or_tensor)] * times + recursive_sequence_lengths = [array_or_tensor.recursive_sequence_lengths()] * times + data = np.concatenate(data, axis=0) + recursive_sequence_lengths = [sum(lens, []) for lens in zip(*recursive_sequence_lengths)] + data_tensor = fluid.LoDTensor() + data_tensor.set(data, place) + data_tensor.set_recursive_sequence_lengths(recursive_sequence_lengths) + assert data_tensor.has_valid_recursive_sequence_lengths() + return data_tensor + elif isinstance(array_or_tensor, list): + return list(chain(*([array_or_tensor] * times))) + else: + return np.concatenate([array_or_tensor] * times, axis=0) + + +def slice_array_or_tensor(array_or_tensor, place, begin, end): + """Repeate numpy array or LoD tensor.""" + if isinstance(array_or_tensor, fluid.LoDTensor): + data = convert_lodtensor_to_list(array_or_tensor) + data = data[begin:end] + return to_lodtensor(data, place) + else: + return array_or_tensor[begin:end] + + +def init_checkpoint(exe, init_checkpoint_path, main_program): + """Initialize from checkpoint.""" + assert os.path.exists(init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path + + def existed_persitables(var): + """Whether var is a persistables.""" + if not fluid.io.is_persistable(var): + return False + return os.path.exists(os.path.join(init_checkpoint_path, var.name)) + + fluid.io.load_vars(exe, init_checkpoint_path, main_program=main_program, predicate=existed_persitables) + print(f"Load model from {init_checkpoint_path}") + + +def init_pretraining_params(exe, pretraining_params_path, main_program): + """Only initialize parameters.""" + assert os.path.exists(pretraining_params_path), "[%s] cann't be found." % pretraining_params_path + + def existed_params(var): + """Whether var is a parameter.""" + if not isinstance(var, fluid.framework.Parameter): + return False + return os.path.exists(os.path.join(pretraining_params_path, var.name)) + + fluid.io.load_vars(exe, pretraining_params_path, main_program=main_program, predicate=existed_params) + print(f"Load pretraining parameters from {pretraining_params_path}.") + + return + + +class Timer(object): + def __init__(self): + self._pass_time = 0 + self._start_time = None + return + + def start(self): + self._start_time = time.time() + + def pause(self): + self._pass_time += time.time() - self._start_time + self._start_time = None + + def reset(self): + self._pass_time = 0 + + @property + def pass_time(self): + if self._start_time is None: + return self._pass_time + else: + return self._pass_time + time.time() - self._start_time + + +ERROR_MESSAGE = "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ + Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" + + +def check_cuda(use_cuda, err=ERROR_MESSAGE): + """Check CUDA.""" + try: + if use_cuda and not fluid.is_compiled_with_cuda(): + print(err) + sys.exit(1) + except Exception as e: + pass diff --git a/hub_module/modules/text/text_generation/plato2_en_large/utils/args.py b/modules/text/text_generation/plato2_en_large/utils/args.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_large/utils/args.py rename to modules/text/text_generation/plato2_en_large/utils/args.py diff --git a/modules/text/text_generation/plato2_en_large/utils/inference.py b/modules/text/text_generation/plato2_en_large/utils/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..f21b0162533dd0748ecae6cf6cbf792003c9ec13 --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/utils/inference.py @@ -0,0 +1,42 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Inference utils.""" + +import os + +import paddle.fluid as fluid + + +def create_predictor(inference_model_path, is_distributed=False): + """Create predictor.""" + if is_distributed: + dev_count = fluid.core.get_cuda_device_count() + gpu_id = int(os.getenv("FLAGS_selected_gpus")) + else: + dev_count = 1 + gpu_id = 0 + + place = fluid.CUDAPlace(gpu_id) + exe = fluid.Executor(place) + + scope = fluid.Scope() + with fluid.scope_guard(scope): + inference_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model(inference_model_path, exe) + + def __predict__(inputs): + with fluid.scope_guard(scope): + outputs = exe.run(inference_prog, feed=inputs, fetch_list=fetch_targets, return_numpy=True) + return outputs + + return __predict__ diff --git a/modules/text/text_generation/plato2_en_large/utils/masking.py b/modules/text/text_generation/plato2_en_large/utils/masking.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c40bec58916e4aae5e4c9fee4c638cb99eff1d --- /dev/null +++ b/modules/text/text_generation/plato2_en_large/utils/masking.py @@ -0,0 +1,122 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Reader utils.""" + +import numpy as np + +import plato2_en_large.utils + + +def mask(batch_tokens, + vocab_size, + bos_id=1, + eos_id=2, + mask_id=3, + sent_b_starts=None, + labels=None, + is_unidirectional=False, + use_latent=False, + use_bow=False): + """ + Add mask for batch_tokens, return out, mask_label, mask_pos; + Note: mask_pos responding the batch_tokens after padded; + """ + batch_tokens = np.copy(batch_tokens) + max_len = max(map(len, batch_tokens)) + mask_label = [] + mask_pos = [] + if labels is not None: + label_pos = [] + + if is_unidirectional: + # unidirectional language model + if use_latent: + max_len += 1 + shift_len = 1 + else: + shift_len = 0 + for sent_index, sent in enumerate(batch_tokens): + sent_b_index = sent_b_starts[sent_index] if sent_b_starts is not None else 0 + need_cal = True + if labels is not None: + label_pos.append(sent_index * max_len + len(sent) - 1 + shift_len) + if labels[sent_index] == 0: + need_cal = False + mask_label.extend(sent[sent_b_index + 1:]) + mask_pos.extend([sent_index * max_len + i + shift_len for i in range(sent_b_index, len(sent) - 1)]) + mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) + mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) + return_list = [mask_label, mask_pos] + + # latent related (bow label and pos) + if use_latent and use_bow: + bow_label = [] + bow_pos = [] + for sent_index, sent in enumerate(batch_tokens): + sent_b_index = sent_b_starts[sent_index] if sent_b_starts is not None else 0 + + def __filter__(tok_id): + # TODO: exclude [EOS] from bow loss + return True + + bow_pos.extend([sent_index for i in range(sent_b_index + 1, len(sent)) if __filter__(sent[i])]) + bow_label.extend([sent[i] for i in range(sent_b_index + 1, len(sent)) if __filter__(sent[i])]) + bow_label = np.array(bow_label).astype("int64").reshape([-1, 1]) + bow_pos = np.array(bow_pos).astype("int64").reshape([-1, 1]) + return_list += [bow_label, bow_pos] + else: + # bidirectional mask language model + total_token_num = sum(map(len, batch_tokens)) + prob_mask = np.random.rand(total_token_num) + # TODO: fix replace_ids, include [UNK] + replace_ids = np.random.randint(3, high=vocab_size, size=total_token_num) + prob_index = 0 + for sent_index, sent in enumerate(batch_tokens): + # add pair label position + if labels is not None: + label_pos.append(sent_index * max_len) + + # add mask label and position + for token_index, token in enumerate(sent): + if token == eos_id or token == bos_id: + continue + prob = prob_mask[prob_index + token_index] + if prob > 0.15: + continue + elif 0.03 < prob <= 0.15: + # mask + mask_label.append(sent[token_index]) + sent[token_index] = mask_id + mask_pos.append(sent_index * max_len + token_index) + elif 0.015 < prob <= 0.03: + # random replace + mask_label.append(sent[token_index]) + sent[token_index] = replace_ids[prob_index + token_index] + mask_pos.append(sent_index * max_len + token_index) + else: + # keep the original token + mask_label.append(sent[token_index]) + mask_pos.append(sent_index * max_len + token_index) + + prob_index += len(sent) + + mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) + mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) + return_list = [batch_tokens, mask_label, mask_pos] + + if labels is not None: + label_pos = np.array(label_pos).astype("int64").reshape([-1, 1]) + assert len(labels) == len(label_pos) + return_list.append(label_pos) + return return_list diff --git a/hub_module/modules/text/text_generation/plato2_en_large/utils/tokenization.py b/modules/text/text_generation/plato2_en_large/utils/tokenization.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_large/utils/tokenization.py rename to modules/text/text_generation/plato2_en_large/utils/tokenization.py diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/README.md b/modules/text/text_review/porn_detection_cnn/README.md similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_cnn/README.md rename to modules/text/text_review/porn_detection_cnn/README.md diff --git a/hub_module/modules/text/text_generation/plato2_en_large/__init__.py b/modules/text/text_review/porn_detection_cnn/__init__.py similarity index 100% rename from hub_module/modules/text/text_generation/plato2_en_large/__init__.py rename to modules/text/text_review/porn_detection_cnn/__init__.py diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/assets/params.txt b/modules/text/text_review/porn_detection_cnn/assets/params.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_cnn/assets/params.txt rename to modules/text/text_review/porn_detection_cnn/assets/params.txt diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/assets/vocab.txt b/modules/text/text_review/porn_detection_cnn/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_cnn/assets/vocab.txt rename to modules/text/text_review/porn_detection_cnn/assets/vocab.txt diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/assets/word_dict.txt b/modules/text/text_review/porn_detection_cnn/assets/word_dict.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_cnn/assets/word_dict.txt rename to modules/text/text_review/porn_detection_cnn/assets/word_dict.txt diff --git a/modules/text/text_review/porn_detection_cnn/module.py b/modules/text/text_review/porn_detection_cnn/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c8456500c618ca149e54c69857ae9977b3202ee1 --- /dev/null +++ b/modules/text/text_review/porn_detection_cnn/module.py @@ -0,0 +1,160 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import get_variable_info +from paddlehub.module.module import moduleinfo, serving +from paddlehub.reader import tokenization + +from porn_detection_cnn.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="porn_detection_cnn", + version="1.1.0", + summary="Baidu's open-source Porn Detection Model.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class PornDetectionCNN(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "infer_model") + self.tokenizer_vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab_path = os.path.join(self.directory, "assets", "word_dict.txt") + self.vocab = load_vocab(self.vocab_path) + self.sequence_max_len = 256 + self.tokenizer = tokenization.FullTokenizer(self.tokenizer_vocab_path) + + self.param_file = os.path.join(self.directory, "assets", "params.txt") + + self.predict = self.detection + + self._set_config() + + def context(self, trainable=False): + """ + Get the input ,output and program of the pretrained porn_detection_cnn + Args: + trainable(bool): whether fine-tune the pretrained parameters of porn_detection_cnn or not + Returns: + inputs(dict): the input variables of porn_detection_cnn (words) + outputs(dict): the output variables of porn_detection_cnn (the sentiment prediction results) + main_program(Program): the main_program of porn_detection_cnn with pretrained prameters + """ + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feed_target_names, fetch_targets = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, executor=exe) + + with open(self.param_file, 'r') as file: + params_list = file.readlines() + for param in params_list: + param = param.strip() + var = program.global_block().var(param) + var_info = get_variable_info(var) + + program.global_block().create_parameter( + shape=var_info['shape'], dtype=var_info['dtype'], name=var_info['name']) + + for param in program.global_block().iter_parameters(): + param.trainable = trainable + + for name, var in program.global_block().vars.items(): + if name == feed_target_names[0]: + inputs = {"words": var} + # output of sencond layer from the end prediction layer (fc-softmax) + if name == "@HUB_porn_detection_cnn@layer_norm_1.tmp_2": + outputs = {"class_probs": fetch_targets[0], "sentence_feature": var} + return inputs, outputs, program + + @serving + def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the porn prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the porn prediction results + """ + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + use_gpu = False + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(batch_data, self.tokenizer, self.vocab, self.sequence_max_len) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"porn": 1, "not_porn": 0} + return self.labels + + +if __name__ == "__main__": + porn_detection_cnn = PornDetectionCNN() + test_text = ["黄片下载", "打击黄牛党"] + + results = porn_detection_cnn.detection(texts=test_text, batch_size=9) + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + input_dict = {"text": test_text} + results = porn_detection_cnn.detection(data=input_dict) + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/processor.py b/modules/text/text_review/porn_detection_cnn/processor.py similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_cnn/processor.py rename to modules/text/text_review/porn_detection_cnn/processor.py diff --git a/hub_module/modules/text/text_review/porn_detection_gru/README.md b/modules/text/text_review/porn_detection_gru/README.md similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_gru/README.md rename to modules/text/text_review/porn_detection_gru/README.md diff --git a/hub_module/modules/text/text_review/porn_detection_cnn/__init__.py b/modules/text/text_review/porn_detection_gru/__init__.py similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_cnn/__init__.py rename to modules/text/text_review/porn_detection_gru/__init__.py diff --git a/hub_module/modules/text/text_review/porn_detection_gru/assets/params.txt b/modules/text/text_review/porn_detection_gru/assets/params.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_gru/assets/params.txt rename to modules/text/text_review/porn_detection_gru/assets/params.txt diff --git a/hub_module/modules/text/text_review/porn_detection_gru/assets/vocab.txt b/modules/text/text_review/porn_detection_gru/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_gru/assets/vocab.txt rename to modules/text/text_review/porn_detection_gru/assets/vocab.txt diff --git a/hub_module/modules/text/text_review/porn_detection_gru/assets/word_dict.txt b/modules/text/text_review/porn_detection_gru/assets/word_dict.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_gru/assets/word_dict.txt rename to modules/text/text_review/porn_detection_gru/assets/word_dict.txt diff --git a/modules/text/text_review/porn_detection_gru/module.py b/modules/text/text_review/porn_detection_gru/module.py new file mode 100644 index 0000000000000000000000000000000000000000..4dd6b5aa31b92d4a9a9cb935a02d82b26fbf1735 --- /dev/null +++ b/modules/text/text_review/porn_detection_gru/module.py @@ -0,0 +1,161 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import get_variable_info +from paddlehub.module.module import moduleinfo, serving +from paddlehub.reader import tokenization + +from porn_detection_gru.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="porn_detection_gru", + version="1.1.0", + summary="Baidu's open-source Porn Detection Model.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class PornDetectionGRU(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "infer_model") + self.tokenizer_vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab_path = os.path.join(self.directory, "assets", "word_dict.txt") + self.vocab = load_vocab(self.vocab_path) + self.sequence_max_len = 256 + self.tokenizer = tokenization.FullTokenizer(self.tokenizer_vocab_path) + + self.param_file = os.path.join(self.directory, "assets", "params.txt") + + self.predict = self.detection + + self._set_config() + + def context(self, trainable=False): + """ + Get the input ,output and program of the pretrained porn_detection_gru + Args: + trainable(bool): whether fine-tune the pretrained parameters of porn_detection_gru or not + Returns: + inputs(dict): the input variables of porn_detection_gru (words) + outputs(dict): the output variables of porn_detection_gru (the sentiment prediction results) + main_program(Program): the main_program of lac with pretrained prameters + """ + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feed_target_names, fetch_targets = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, executor=exe) + + with open(self.param_file, 'r') as file: + params_list = file.readlines() + for param in params_list: + param = param.strip() + var = program.global_block().var(param) + var_info = get_variable_info(var) + program.global_block().create_parameter( + shape=var_info['shape'], dtype=var_info['dtype'], name=var_info['name']) + + for param in program.global_block().iter_parameters(): + param.trainable = trainable + + for name, var in program.global_block().vars.items(): + if name == feed_target_names[0]: + inputs = {"words": var} + # output of sencond layer from the end prediction layer (fc-softmax) + if name == "@HUB_porn_detection_gru@layer_norm_0.tmp_2": + outputs = {"class_probs": fetch_targets[0], "sentence_feature": var} + return inputs, outputs, program + + @serving + def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the porn prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the porn prediction results + """ + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + use_gpu = False + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(batch_data, self.tokenizer, self.vocab, self.sequence_max_len) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"porn": 1, "not_porn": 0} + return self.labels + + +if __name__ == "__main__": + porn_detection_gru = PornDetectionGRU() + porn_detection_gru.context() + # porn_detection_gru = hub.Module(name='porn_detection_gru') + test_text = ["黄片下载", "打击黄牛党"] + + results = porn_detection_gru.detection(texts=test_text) + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + input_dict = {"text": test_text} + results = porn_detection_gru.detection(data=input_dict) + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/hub_module/modules/text/text_review/porn_detection_gru/processor.py b/modules/text/text_review/porn_detection_gru/processor.py similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_gru/processor.py rename to modules/text/text_review/porn_detection_gru/processor.py diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/README.md b/modules/text/text_review/porn_detection_lstm/README.md similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_lstm/README.md rename to modules/text/text_review/porn_detection_lstm/README.md diff --git a/hub_module/modules/text/text_review/porn_detection_gru/__init__.py b/modules/text/text_review/porn_detection_lstm/__init__.py similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_gru/__init__.py rename to modules/text/text_review/porn_detection_lstm/__init__.py diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/assets/params.txt b/modules/text/text_review/porn_detection_lstm/assets/params.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_lstm/assets/params.txt rename to modules/text/text_review/porn_detection_lstm/assets/params.txt diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/assets/vocab.txt b/modules/text/text_review/porn_detection_lstm/assets/vocab.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_lstm/assets/vocab.txt rename to modules/text/text_review/porn_detection_lstm/assets/vocab.txt diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/assets/word_dict.txt b/modules/text/text_review/porn_detection_lstm/assets/word_dict.txt similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_lstm/assets/word_dict.txt rename to modules/text/text_review/porn_detection_lstm/assets/word_dict.txt diff --git a/modules/text/text_review/porn_detection_lstm/module.py b/modules/text/text_review/porn_detection_lstm/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e1b7778a5529a91b0531589b954241e92fc2f041 --- /dev/null +++ b/modules/text/text_review/porn_detection_lstm/module.py @@ -0,0 +1,160 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import math +import os +import six + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.common.paddle_helper import get_variable_info +from paddlehub.module.module import moduleinfo, serving +from paddlehub.reader import tokenization + +from porn_detection_lstm.processor import load_vocab, preprocess, postprocess + + +@moduleinfo( + name="porn_detection_lstm", + version="1.1.0", + summary="Baidu's open-source Porn Detection Model.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") +class PornDetectionLSTM(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, "infer_model") + self.tokenizer_vocab_path = os.path.join(self.directory, "assets", "vocab.txt") + self.vocab_path = os.path.join(self.directory, "assets", "word_dict.txt") + self.vocab = load_vocab(self.vocab_path) + self.sequence_max_len = 256 + self.tokenizer = tokenization.FullTokenizer(self.tokenizer_vocab_path) + + self.param_file = os.path.join(self.directory, "assets", "params.txt") + + self.predict = self.detection + + self._set_config() + + def context(self, trainable=False): + """ + Get the input ,output and program of the pretrained porn_detection_lstm + Args: + trainable(bool): whether fine-tune the pretrained parameters of porn_detection_lstm or not + Returns: + inputs(dict): the input variables of porn_detection_lstm (words) + outputs(dict): the output variables of porn_detection_lstm (the sentiment prediction results) + main_program(Program): the main_program of lac with pretrained prameters + """ + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feed_target_names, fetch_targets = fluid.io.load_inference_model( + dirname=self.pretrained_model_path, executor=exe) + + with open(self.param_file, 'r') as file: + params_list = file.readlines() + for param in params_list: + param = param.strip() + var = program.global_block().var(param) + var_info = get_variable_info(var) + program.global_block().create_parameter( + shape=var_info['shape'], dtype=var_info['dtype'], name=var_info['name']) + + for param in program.global_block().iter_parameters(): + param.trainable = trainable + + for name, var in program.global_block().vars.items(): + if name == feed_target_names[0]: + inputs = {"words": var} + # output of sencond layer from the end prediction layer (fc-softmax) + if name == "@HUB_porn_detection_lstm@layer_norm_0.tmp_2": + outputs = {"class_probs": fetch_targets[0], "sentence_feature": var} + return inputs, outputs, program + + @serving + def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): + """ + Get the porn prediction results results with the texts as input + + Args: + texts(list): the input texts to be predicted, if texts not data + data(dict): key must be 'text', value is the texts to be predicted, if data not texts + use_gpu(bool): whether use gpu to predict or not + batch_size(int): the program deals once with one batch + + Returns: + results(list): the porn prediction results + """ + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + use_gpu = False + + if texts != [] and isinstance(texts, list) and data == {}: + predicted_data = texts + elif texts == [] and isinstance(data, dict) and isinstance(data.get('text', None), list) and data['text']: + predicted_data = data["text"] + else: + raise ValueError("The input data is inconsistent with expectations.") + + predicted_data = self.to_unicode(predicted_data) + start_idx = 0 + iteration = int(math.ceil(len(predicted_data) / batch_size)) + results = [] + for i in range(iteration): + if i < (iteration - 1): + batch_data = predicted_data[start_idx:(start_idx + batch_size)] + else: + batch_data = predicted_data[start_idx:] + + start_idx = start_idx + batch_size + processed_results = preprocess(batch_data, self.tokenizer, self.vocab, self.sequence_max_len) + tensor_words = self.texts2tensor(processed_results) + + if use_gpu: + batch_out = self.gpu_predictor.run([tensor_words]) + else: + batch_out = self.cpu_predictor.run([tensor_words]) + batch_result = postprocess(batch_out[0], processed_results) + results += batch_result + return results + + def get_labels(self): + """ + Get the labels which was used when pretraining + Returns: + self.labels(dict) + """ + self.labels = {"porn": 1, "not_porn": 0} + return self.labels + + +if __name__ == "__main__": + porn_detection_lstm = PornDetectionLSTM() + porn_detection_lstm.context() + test_text = ["黄片下载", "打击黄牛党"] + + results = porn_detection_lstm.detection(texts=test_text) + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) + input_dict = {"text": test_text} + results = porn_detection_lstm.detection(data=input_dict) + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + if six.PY2: + print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/processor.py b/modules/text/text_review/porn_detection_lstm/processor.py similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_lstm/processor.py rename to modules/text/text_review/porn_detection_lstm/processor.py diff --git a/hub_module/modules/video/README.md b/modules/video/README.md similarity index 100% rename from hub_module/modules/video/README.md rename to modules/video/README.md diff --git a/hub_module/modules/video/classification/README.md b/modules/video/classification/README.md similarity index 100% rename from hub_module/modules/video/classification/README.md rename to modules/video/classification/README.md diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/README.md b/modules/video/classification/videotag_tsn_lstm/README.md similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/README.md rename to modules/video/classification/videotag_tsn_lstm/README.md diff --git a/hub_module/modules/text/text_review/porn_detection_lstm/__init__.py b/modules/video/classification/videotag_tsn_lstm/__init__.py similarity index 100% rename from hub_module/modules/text/text_review/porn_detection_lstm/__init__.py rename to modules/video/classification/videotag_tsn_lstm/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/module.py b/modules/video/classification/videotag_tsn_lstm/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fee2124d6ea94c8a8cd4992567bfc100df0698d9 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/module.py @@ -0,0 +1,199 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import os + +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable +from paddlehub.common.logger import logger + +from videotag_tsn_lstm.resource.utils.config_utils import * +import videotag_tsn_lstm.resource.models as models +from videotag_tsn_lstm.resource.reader import get_reader +from videotag_tsn_lstm.resource.metrics import get_metrics +from videotag_tsn_lstm.resource.utils.utility import check_cuda +from videotag_tsn_lstm.resource.utils.utility import check_version + + +@moduleinfo( + name="videotag_tsn_lstm", + version="1.0.0", + summary= + "videotag_tsn_lstm is a video classification model, using TSN for feature extraction and AttentionLSTM for classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + type="video/classification", +) +class VideoTag(hub.Module): + def _initialize(self): + # add arg parser + self.parser = argparse.ArgumentParser( + description="Run the videotag_tsn_lstm module.", + prog='hub run videotag_tsn_lstm', + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--use_gpu', type=ast.literal_eval, default=False, help='default use gpu.') + self.parser.add_argument('--input_path', type=str, default=None, help='path of video data, single video') + self._has_load = False + + def _extractor(self, args, exe, place): + extractor_scope = fluid.Scope() + with fluid.scope_guard(extractor_scope): + extractor_startup_prog = fluid.Program() + extractor_main_prog = fluid.Program() + with fluid.program_guard(extractor_main_prog, extractor_startup_prog): + extractor_config = parse_config(args.extractor_config) + extractor_infer_config = merge_configs(extractor_config, 'infer', vars(args)) + + # build model + extractor_model = models.get_model("TSN", extractor_infer_config, mode='infer') + extractor_model.build_input(use_dataloader=False) + extractor_model.build_model() + extractor_feeds = extractor_model.feeds() + extractor_fetch_list = extractor_model.fetches() + + exe.run(extractor_startup_prog) + + logger.info('load extractor weights from {}'.format(args.extractor_weights)) + extractor_model.load_test_weights(exe, args.extractor_weights, extractor_main_prog) + + extractor_feeder = fluid.DataFeeder(place=place, feed_list=extractor_feeds) + return extractor_main_prog, extractor_fetch_list, extractor_feeder, extractor_scope + + def _predictor(self, args, exe, place): + predictor_scope = fluid.Scope() + with fluid.scope_guard(predictor_scope): + predictor_startup_prog = fluid.default_startup_program() + predictor_main_prog = fluid.default_main_program() + with fluid.program_guard(predictor_main_prog, predictor_startup_prog): + # parse config + predictor_config = parse_config(args.predictor_config) + predictor_infer_config = merge_configs(predictor_config, 'infer', vars(args)) + + predictor_model = models.get_model("AttentionLSTM", predictor_infer_config, mode='infer') + predictor_model.build_input(use_dataloader=False) + predictor_model.build_model() + predictor_feeds = predictor_model.feeds() + predictor_outputs = predictor_model.outputs() + + exe.run(predictor_startup_prog) + + logger.info('load lstm weights from {}'.format(args.predictor_weights)) + predictor_model.load_test_weights(exe, args.predictor_weights, predictor_main_prog) + + predictor_feeder = fluid.DataFeeder(place=place, feed_list=predictor_feeds) + predictor_fetch_list = predictor_model.fetches() + return predictor_main_prog, predictor_fetch_list, predictor_feeder, predictor_scope + + @runnable + def run_cmd(self, argsv): + args = self.parser.parse_args(argsv) + results = self.classify(paths=[args.input_path], use_gpu=args.use_gpu) + return results + + def classify(self, paths, use_gpu=False, threshold=0.5, top_k=10): + """ + API of Classification. + + Args: + paths (list[str]): the path of mp4s. + use_gpu (bool): whether to use gpu or not. + threshold (float): the result value >= threshold will be returned. + top_k (int): the top k result will be returned. + + Returns: + results (list[dict]): every dict includes the mp4 file path and prediction. + """ + args = self.parser.parse_args([]) + # config the args in videotag_tsn_lstm + args.use_gpu = use_gpu + args.filelist = paths + args.topk = top_k + args.threshold = threshold + args.extractor_config = os.path.join(self.directory, 'resource', 'configs', 'tsn.yaml') + args.predictor_config = os.path.join(self.directory, 'resource', 'configs', 'attention_lstm.yaml') + args.extractor_weights = os.path.join(self.directory, 'weights', 'tsn') + args.predictor_weights = os.path.join(self.directory, 'weights', 'attention_lstm') + args.label_file = os.path.join(self.directory, 'resource', 'label_3396.txt') + + check_cuda(args.use_gpu) + check_version() + + if not self._has_load: + self.place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + self.exe = fluid.Executor(self.place) + self.extractor_main_prog, self.extractor_fetch_list, self.extractor_feeder, self.extractor_scope = self._extractor( + args, self.exe, self.place) + self.predictor_main_prog, self.predictor_fetch_list, self.predictor_feeder, self.predictor_scope = self._predictor( + args, self.exe, self.place) + self._has_load = True + + extractor_config = parse_config(args.extractor_config) + extractor_infer_config = merge_configs(extractor_config, 'infer', vars(args)) + extractor_reader = get_reader("TSN", 'infer', extractor_infer_config) + feature_list = [] + file_list = [] + + for idx, data in enumerate(extractor_reader()): + file_id = [item[-1] for item in data] + feed_data = [item[:-1] for item in data] + feature_out = self.exe.run( + program=self.extractor_main_prog, + fetch_list=self.extractor_fetch_list, + feed=self.extractor_feeder.feed(feed_data), + scope=self.extractor_scope) + feature_list.append(feature_out) + file_list.append(file_id) + logger.info('========[Stage 1 Sample {} ] Tsn feature extractor finished======'.format(idx)) + + # get AttentionLSTM input from Tsn output + num_frames = 300 + predictor_feed_list = [] + for i in range(len(feature_list)): + feature_out = feature_list[i] + extractor_feature = feature_out[0] + predictor_feed_data = [[extractor_feature[0].astype(float)[0:num_frames, :]]] + predictor_feed_list.append((predictor_feed_data, file_list[i])) + + metrics_config = parse_config(args.predictor_config) + metrics_config['MODEL']['topk'] = args.topk + metrics_config['MODEL']['threshold'] = args.threshold + predictor_metrics = get_metrics("AttentionLSTM".upper(), 'infer', metrics_config) + predictor_metrics.reset() + for idx, data in enumerate(predictor_feed_list): + file_id = data[1] + predictor_feed_data = data[0] + final_outs = self.exe.run( + program=self.predictor_main_prog, + fetch_list=self.predictor_fetch_list, + feed=self.predictor_feeder.feed(predictor_feed_data, ), + scope=self.predictor_scope) + logger.info('=======[Stage 2 Sample {} ] AttentionLSTM predict finished========'.format(idx)) + final_result_list = [item for item in final_outs] + [file_id] + + predictor_metrics.accumulate(final_result_list) + results = predictor_metrics.finalize_and_log_out(label_file=args.label_file) + return results + + +if __name__ == '__main__': + test_module = VideoTag() + print(test_module.run_cmd(argsv=['--input_path', "1.mp4", '--use_gpu', str(False)])) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/__init__.py diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml b/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml rename to modules/video/classification/videotag_tsn_lstm/resource/configs/attention_lstm.yaml diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml b/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml rename to modules/video/classification/videotag_tsn_lstm/resource/configs/tsn.yaml diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt b/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt rename to modules/video/classification/videotag_tsn_lstm/resource/label_3396.txt diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/metrics/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py b/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py new file mode 100644 index 0000000000000000000000000000000000000000..dd1d55552c9733113ede2b2d9a3e577a0d642ce4 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/metrics/metrics_util.py @@ -0,0 +1,150 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division + +import io +import logging + +import numpy as np + +from videotag_tsn_lstm.resource.metrics.youtube8m import eval_util as youtube8m_metrics + +logger = logging.getLogger(__name__) + + +class Metrics(object): + def __init__(self, name, mode, metrics_args): + """Not implemented""" + pass + + def calculate_and_log_out(self, fetch_list, info=''): + """Not implemented""" + pass + + def accumulate(self, fetch_list, info=''): + """Not implemented""" + pass + + def finalize_and_log_out(self, info='', savedir='./'): + """Not implemented""" + pass + + def reset(self): + """Not implemented""" + pass + + +class Youtube8mMetrics(Metrics): + def __init__(self, name, mode, metrics_args): + self.name = name + self.mode = mode + self.num_classes = metrics_args['MODEL']['num_classes'] + self.topk = metrics_args['MODEL']['topk'] + self.threshold = metrics_args['MODEL']['threshold'] + + self.calculator = youtube8m_metrics.EvaluationMetrics(self.num_classes, self.topk) + if self.mode == 'infer': + self.infer_results = [] + + def calculate_and_log_out(self, fetch_list, info=''): + loss = np.mean(np.array(fetch_list[0])) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) + hit_at_one = youtube8m_metrics.calculate_hit_at_one(pred, label) + perr = youtube8m_metrics.calculate_precision_at_equal_recall_rate(pred, label) + gap = youtube8m_metrics.calculate_gap(pred, label) + logger.info(info + ' , loss = {0}, Hit@1 = {1}, PERR = {2}, GAP = {3}'.format(\ + '%.6f' % loss, '%.2f' % hit_at_one, '%.2f' % perr, '%.2f' % gap)) + + def accumulate(self, fetch_list, info=''): + if self.mode == 'infer': + predictions = np.array(fetch_list[0]) + video_id = fetch_list[1] + for i in range(len(predictions)): + topk_inds = predictions[i].argsort()[0 - self.topk:] + topk_inds = topk_inds[::-1] + preds = predictions[i][topk_inds] + self.infer_results.append((video_id[i], topk_inds.tolist(), preds.tolist())) + else: + loss = np.array(fetch_list[0]) + pred = np.array(fetch_list[1]) + label = np.array(fetch_list[2]) + self.calculator.accumulate(loss, pred, label) + + def finalize_and_log_out(self, info='', label_file='./label_3396.txt'): + if self.mode == 'infer': + all_res_list = [] + for index, item in enumerate(self.infer_results): + video_id = item[0] + f = io.open(label_file, "r", encoding="utf-8") + fl = f.readlines() + res = {} + res["path"] = video_id + res["prediction"] = {} + for i in range(len(item[1])): + class_id = item[1][i] + class_prob = item[2][i] + if class_prob < self.threshold: + continue + class_name = fl[class_id].split('\n')[0] + res["prediction"][class_name] = class_prob + if not res["prediction"]: + logger.warning("%s: No prediction exceeds the threshold = %s." % (video_id, self.threshold)) + all_res_list.append(res) + return all_res_list + else: + epoch_info_dict = self.calculator.get() + logger.info(info + '\tavg_hit_at_one: {0},\tavg_perr: {1},\tavg_loss :{2},\taps: {3},\tgap:{4}'\ + .format(epoch_info_dict['avg_hit_at_one'], epoch_info_dict['avg_perr'], \ + epoch_info_dict['avg_loss'], epoch_info_dict['aps'], epoch_info_dict['gap'])) + + def reset(self): + self.calculator.clear() + if self.mode == 'infer': + self.infer_results = [] + + +class MetricsZoo(object): + def __init__(self): + self.metrics_zoo = {} + + def regist(self, name, metrics): + assert metrics.__base__ == Metrics, "Unknow model type {}".format(type(metrics)) + self.metrics_zoo[name] = metrics + + def get(self, name, mode, cfg): + for k, v in self.metrics_zoo.items(): + if k == name: + return v(name, mode, cfg) + raise KeyError(name, self.metrics_zoo.keys()) + + +# singleton metrics_zoo +metrics_zoo = MetricsZoo() + + +def regist_metrics(name, metrics): + metrics_zoo.regist(name, metrics) + + +def get_metrics(name, mode, cfg): + return metrics_zoo.get(name, mode, cfg) + + +# sort by alphabet +regist_metrics("ATTENTIONLSTM", Youtube8mMetrics) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..fc5f64e1cdd1ccf0add746e35b46f79dd10cf898 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/average_precision_calculator.py @@ -0,0 +1,262 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Calculate or keep track of the interpolated average precision. + +It provides an interface for calculating interpolated average precision for an +entire list or the top-n ranked items. For the definition of the +(non-)interpolated average precision: +http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf + +Example usages: +1) Use it as a static function call to directly calculate average precision for +a short ranked list in the memory. + +``` +import random + +p = np.array([random.random() for _ in xrange(10)]) +a = np.array([random.choice([0, 1]) for _ in xrange(10)]) + +ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a) +``` + +2) Use it as an object for long ranked list that cannot be stored in memory or +the case where partial predictions can be observed at a time (Tensorflow +predictions). In this case, we first call the function accumulate many times +to process parts of the ranked list. After processing all the parts, we call +peek_interpolated_ap_at_n. +``` +p1 = np.array([random.random() for _ in xrange(5)]) +a1 = np.array([random.choice([0, 1]) for _ in xrange(5)]) +p2 = np.array([random.random() for _ in xrange(5)]) +a2 = np.array([random.choice([0, 1]) for _ in xrange(5)]) + +# interpolated average precision at 10 using 1000 break points +calculator = average_precision_calculator.AveragePrecisionCalculator(10) +calculator.accumulate(p1, a1) +calculator.accumulate(p2, a2) +ap3 = calculator.peek_ap_at_n() +``` +""" + +import heapq +import random +import numbers + +import numpy + + +class AveragePrecisionCalculator(object): + """Calculate the average precision and average precision at n.""" + + def __init__(self, top_n=None): + """Construct an AveragePrecisionCalculator to calculate average precision. + + This class is used to calculate the average precision for a single label. + + Args: + top_n: A positive Integer specifying the average precision at n, or + None to use all provided data points. + + Raises: + ValueError: An error occurred when the top_n is not a positive integer. + """ + if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None): + raise ValueError("top_n must be a positive integer or None.") + + self._top_n = top_n # average precision at n + self._total_positives = 0 # total number of positives have seen + self._heap = [] # max heap of (prediction, actual) + + @property + def heap_size(self): + """Gets the heap size maintained in the class.""" + return len(self._heap) + + @property + def num_accumulated_positives(self): + """Gets the number of positive samples that have been accumulated.""" + return self._total_positives + + def accumulate(self, predictions, actuals, num_positives=None): + """Accumulate the predictions and their ground truth labels. + + After the function call, we may call peek_ap_at_n to actually calculate + the average precision. + Note predictions and actuals must have the same shape. + + Args: + predictions: a list storing the prediction scores. + actuals: a list storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + num_positives = If the 'predictions' and 'actuals' inputs aren't complete, + then it's possible some true positives were missed in them. In that case, + you can provide 'num_positives' in order to accurately track recall. + + Raises: + ValueError: An error occurred when the format of the input is not the + numpy 1-D array or the shape of predictions and actuals does not match. + """ + if len(predictions) != len(actuals): + raise ValueError("the shape of predictions and actuals does not match.") + + if not num_positives is None: + if not isinstance(num_positives, numbers.Number) or num_positives < 0: + raise ValueError("'num_positives' was provided but it wan't a nonzero number.") + + if not num_positives is None: + self._total_positives += num_positives + else: + self._total_positives += numpy.size(numpy.where(actuals > 0)) + topk = self._top_n + heap = self._heap + + for i in range(numpy.size(predictions)): + if topk is None or len(heap) < topk: + heapq.heappush(heap, (predictions[i], actuals[i])) + else: + if predictions[i] > heap[0][0]: # heap[0] is the smallest + heapq.heappop(heap) + heapq.heappush(heap, (predictions[i], actuals[i])) + + def clear(self): + """Clear the accumulated predictions.""" + self._heap = [] + self._total_positives = 0 + + def peek_ap_at_n(self): + """Peek the non-interpolated average precision at n. + + Returns: + The non-interpolated average precision at n (default 0). + If n is larger than the length of the ranked list, + the average precision will be returned. + """ + if self.heap_size <= 0: + return 0 + predlists = numpy.array(list(zip(*self._heap))) + + ap = self.ap_at_n(predlists[0], predlists[1], n=self._top_n, total_num_positives=self._total_positives) + return ap + + @staticmethod + def ap(predictions, actuals): + """Calculate the non-interpolated average precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + actuals: a numpy 1-D array storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + + Returns: + The non-interpolated average precision at n. + If n is larger than the length of the ranked list, + the average precision will be returned. + + Raises: + ValueError: An error occurred when the format of the input is not the + numpy 1-D array or the shape of predictions and actuals does not match. + """ + return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None) + + @staticmethod + def ap_at_n(predictions, actuals, n=20, total_num_positives=None): + """Calculate the non-interpolated average precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + actuals: a numpy 1-D array storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + n: the top n items to be considered in ap@n. + total_num_positives : (optionally) you can specify the number of total + positive + in the list. If specified, it will be used in calculation. + + Returns: + The non-interpolated average precision at n. + If n is larger than the length of the ranked list, + the average precision will be returned. + + Raises: + ValueError: An error occurred when + 1) the format of the input is not the numpy 1-D array; + 2) the shape of predictions and actuals does not match; + 3) the input n is not a positive integer. + """ + if len(predictions) != len(actuals): + raise ValueError("the shape of predictions and actuals does not match.") + + if n is not None: + if not isinstance(n, int) or n <= 0: + raise ValueError("n must be 'None' or a positive integer." " It was '%s'." % n) + + ap = 0.0 + + predictions = numpy.array(predictions) + actuals = numpy.array(actuals) + + # add a shuffler to avoid overestimating the ap + predictions, actuals = AveragePrecisionCalculator._shuffle(predictions, actuals) + sortidx = sorted(range(len(predictions)), key=lambda k: predictions[k], reverse=True) + + if total_num_positives is None: + numpos = numpy.size(numpy.where(actuals > 0)) + else: + numpos = total_num_positives + + if numpos == 0: + return 0 + + if n is not None: + numpos = min(numpos, n) + delta_recall = 1.0 / numpos + poscount = 0.0 + + # calculate the ap + r = len(sortidx) + if n is not None: + r = min(r, n) + for i in range(r): + if actuals[sortidx[i]] > 0: + poscount += 1 + ap += poscount / (i + 1) * delta_recall + return ap + + @staticmethod + def _shuffle(predictions, actuals): + random.seed(0) + suffidx = random.sample(range(len(predictions)), len(predictions)) + predictions = predictions[suffidx] + actuals = actuals[suffidx] + return predictions, actuals + + @staticmethod + def _zero_one_normalize(predictions, epsilon=1e-7): + """Normalize the predictions to the range between 0.0 and 1.0. + + For some predictions like SVM predictions, we need to normalize them before + calculate the interpolated average precision. The normalization will not + change the rank in the original list and thus won't change the average + precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + epsilon: a small constant to avoid denominator being zero. + + Returns: + The normalized prediction. + """ + denominator = numpy.max(predictions) - numpy.min(predictions) + ret = (predictions - numpy.min(predictions)) / numpy.max(denominator, epsilon) + return ret diff --git a/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py new file mode 100644 index 0000000000000000000000000000000000000000..8b61dba4fb4718c26b6d03626bac59dd00842def --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/eval_util.py @@ -0,0 +1,225 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Provides functions to help with evaluating models.""" +import datetime +import numpy + +from . import mean_average_precision_calculator as map_calculator +from . import average_precision_calculator as ap_calculator + + +def flatten(l): + """ Merges a list of lists into a single list. """ + return [item for sublist in l for item in sublist] + + +def calculate_hit_at_one(predictions, actuals): + """Performs a local (numpy) calculation of the hit at one. + + Args: + predictions: Matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + + Returns: + float: The average hit at one across the entire batch. + """ + top_prediction = numpy.argmax(predictions, 1) + hits = actuals[numpy.arange(actuals.shape[0]), top_prediction] + return numpy.average(hits) + + +def calculate_precision_at_equal_recall_rate(predictions, actuals): + """Performs a local (numpy) calculation of the PERR. + + Args: + predictions: Matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + + Returns: + float: The average precision at equal recall rate across the entire batch. + """ + aggregated_precision = 0.0 + num_videos = actuals.shape[0] + for row in numpy.arange(num_videos): + num_labels = int(numpy.sum(actuals[row])) + top_indices = numpy.argpartition(predictions[row], -num_labels)[-num_labels:] + item_precision = 0.0 + for label_index in top_indices: + if predictions[row][label_index] > 0: + item_precision += actuals[row][label_index] + item_precision /= top_indices.size + aggregated_precision += item_precision + aggregated_precision /= num_videos + return aggregated_precision + + +def calculate_gap(predictions, actuals, top_k=20): + """Performs a local (numpy) calculation of the global average precision. + + Only the top_k predictions are taken for each of the videos. + + Args: + predictions: Matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + top_k: How many predictions to use per video. + + Returns: + float: The global average precision. + """ + gap_calculator = ap_calculator.AveragePrecisionCalculator() + sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k) + gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) + return gap_calculator.peek_ap_at_n() + + +def top_k_by_class(predictions, labels, k=20): + """Extracts the top k predictions for each video, sorted by class. + + Args: + predictions: A numpy matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + k: the top k non-zero entries to preserve in each prediction. + + Returns: + A tuple (predictions,labels, true_positives). 'predictions' and 'labels' + are lists of lists of floats. 'true_positives' is a list of scalars. The + length of the lists are equal to the number of classes. The entries in the + predictions variable are probability predictions, and + the corresponding entries in the labels variable are the ground truth for + those predictions. The entries in 'true_positives' are the number of true + positives for each class in the ground truth. + + Raises: + ValueError: An error occurred when the k is not a positive integer. + """ + if k <= 0: + raise ValueError("k must be a positive integer.") + k = min(k, predictions.shape[1]) + num_classes = predictions.shape[1] + prediction_triplets = [] + for video_index in range(predictions.shape[0]): + prediction_triplets.extend(top_k_triplets(predictions[video_index], labels[video_index], k)) + out_predictions = [[] for v in range(num_classes)] + out_labels = [[] for v in range(num_classes)] + for triplet in prediction_triplets: + out_predictions[triplet[0]].append(triplet[1]) + out_labels[triplet[0]].append(triplet[2]) + out_true_positives = [numpy.sum(labels[:, i]) for i in range(num_classes)] + + return out_predictions, out_labels, out_true_positives + + +def top_k_triplets(predictions, labels, k=20): + """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in + (prediction, class) format""" + m = len(predictions) + k = min(k, m) + indices = numpy.argpartition(predictions, -k)[-k:] + return [(index, predictions[index], labels[index]) for index in indices] + + +class EvaluationMetrics(object): + """A class to store the evaluation metrics.""" + + def __init__(self, num_class, top_k): + """Construct an EvaluationMetrics object to store the evaluation metrics. + + Args: + num_class: A positive integer specifying the number of classes. + top_k: A positive integer specifying how many predictions are considered per video. + + Raises: + ValueError: An error occurred when MeanAveragePrecisionCalculator cannot + not be constructed. + """ + self.sum_hit_at_one = 0.0 + self.sum_perr = 0.0 + self.sum_loss = 0.0 + self.map_calculator = map_calculator.MeanAveragePrecisionCalculator(num_class) + self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator() + self.top_k = top_k + self.num_examples = 0 + + #def accumulate(self, predictions, labels, loss): + def accumulate(self, loss, predictions, labels): + """Accumulate the metrics calculated locally for this mini-batch. + + Args: + predictions: A numpy matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + labels: A numpy matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + loss: A numpy array containing the loss for each sample. + + Returns: + dictionary: A dictionary storing the metrics for the mini-batch. + + Raises: + ValueError: An error occurred when the shape of predictions and actuals + does not match. + """ + batch_size = labels.shape[0] + mean_hit_at_one = calculate_hit_at_one(predictions, labels) + mean_perr = calculate_precision_at_equal_recall_rate(predictions, labels) + mean_loss = numpy.mean(loss) + + # Take the top 20 predictions. + sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, labels, self.top_k) + self.map_calculator.accumulate(sparse_predictions, sparse_labels, num_positives) + self.global_ap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) + + self.num_examples += batch_size + self.sum_hit_at_one += mean_hit_at_one * batch_size + self.sum_perr += mean_perr * batch_size + self.sum_loss += mean_loss * batch_size + + return {"hit_at_one": mean_hit_at_one, "perr": mean_perr, "loss": mean_loss} + + def get(self): + """Calculate the evaluation metrics for the whole epoch. + + Raises: + ValueError: If no examples were accumulated. + + Returns: + dictionary: a dictionary storing the evaluation metrics for the epoch. The + dictionary has the fields: avg_hit_at_one, avg_perr, avg_loss, and + aps (default nan). + """ + if self.num_examples <= 0: + raise ValueError("total_sample must be positive.") + avg_hit_at_one = self.sum_hit_at_one / self.num_examples + avg_perr = self.sum_perr / self.num_examples + avg_loss = self.sum_loss / self.num_examples + + aps = self.map_calculator.peek_map_at_n() + gap = self.global_ap_calculator.peek_ap_at_n() + + epoch_info_dict = {} + return {"avg_hit_at_one": avg_hit_at_one, "avg_perr": avg_perr, "avg_loss": avg_loss, "aps": aps, "gap": gap} + + def clear(self): + """Clear the evaluation metrics and reset the EvaluationMetrics object.""" + self.sum_hit_at_one = 0.0 + self.sum_perr = 0.0 + self.sum_loss = 0.0 + self.map_calculator.clear() + self.global_ap_calculator.clear() + self.num_examples = 0 diff --git a/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..73ae96d9d73624358179e21269c3699e5148808d --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/mean_average_precision_calculator.py @@ -0,0 +1,108 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Calculate the mean average precision. + +It provides an interface for calculating mean average precision +for an entire list or the top-n ranked items. + +Example usages: +We first call the function accumulate many times to process parts of the ranked +list. After processing all the parts, we call peek_map_at_n +to calculate the mean average precision. + +``` +import random + +p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)]) +a = np.array([[random.choice([0, 1]) for _ in xrange(50)] + for _ in xrange(1000)]) + +# mean average precision for 50 classes. +calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator( + num_class=50) +calculator.accumulate(p, a) +aps = calculator.peek_map_at_n() +``` +""" + +import numpy +from . import average_precision_calculator + + +class MeanAveragePrecisionCalculator(object): + """This class is to calculate mean average precision. + """ + + def __init__(self, num_class): + """Construct a calculator to calculate the (macro) average precision. + + Args: + num_class: A positive Integer specifying the number of classes. + top_n_array: A list of positive integers specifying the top n for each + class. The top n in each class will be used to calculate its average + precision at n. + The size of the array must be num_class. + + Raises: + ValueError: An error occurred when num_class is not a positive integer; + or the top_n_array is not a list of positive integers. + """ + if not isinstance(num_class, int) or num_class <= 1: + raise ValueError("num_class must be a positive integer.") + + self._ap_calculators = [] # member of AveragePrecisionCalculator + self._num_class = num_class # total number of classes + for i in range(num_class): + self._ap_calculators.append(average_precision_calculator.AveragePrecisionCalculator()) + + def accumulate(self, predictions, actuals, num_positives=None): + """Accumulate the predictions and their ground truth labels. + + Args: + predictions: A list of lists storing the prediction scores. The outer + dimension corresponds to classes. + actuals: A list of lists storing the ground truth labels. The dimensions + should correspond to the predictions input. Any value + larger than 0 will be treated as positives, otherwise as negatives. + num_positives: If provided, it is a list of numbers representing the + number of true positives for each class. If not provided, the number of + true positives will be inferred from the 'actuals' array. + + Raises: + ValueError: An error occurred when the shape of predictions and actuals + does not match. + """ + if not num_positives: + num_positives = [None for i in predictions.shape[1]] + + calculators = self._ap_calculators + for i in range(len(predictions)): + calculators[i].accumulate(predictions[i], actuals[i], num_positives[i]) + + def clear(self): + for calculator in self._ap_calculators: + calculator.clear() + + def is_empty(self): + return ([calculator.heap_size for calculator in self._ap_calculators] == [0 for _ in range(self._num_class)]) + + def peek_map_at_n(self): + """Peek the non-interpolated mean average precision at n. + + Returns: + An array of non-interpolated average precision at n (default 0) for each + class. + """ + aps = [self._ap_calculators[i].peek_ap_at_n() for i in range(self._num_class)] + return aps diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/models/__init__.py diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py b/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py new file mode 100644 index 0000000000000000000000000000000000000000..d336ae29a4c1e9709449d87297efb54080cf2435 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/attention_lstm.py @@ -0,0 +1,132 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import logging + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +from ..model import ModelBase +from .lstm_attention import LSTMAttentionModel + +__all__ = ["AttentionLSTM"] +logger = logging.getLogger(__name__) + + +class AttentionLSTM(ModelBase): + def __init__(self, name, cfg, mode='train'): + super(AttentionLSTM, self).__init__(name, cfg, mode) + self.get_config() + + def get_config(self): + # get model configs + self.feature_num = self.cfg.MODEL.feature_num + self.feature_names = self.cfg.MODEL.feature_names + self.feature_dims = self.cfg.MODEL.feature_dims + self.num_classes = self.cfg.MODEL.num_classes + self.embedding_size = self.cfg.MODEL.embedding_size + self.lstm_size = self.cfg.MODEL.lstm_size + self.drop_rate = self.cfg.MODEL.drop_rate + + # get mode configs + self.batch_size = self.get_config_from_sec(self.mode, 'batch_size', 1) + self.num_gpus = self.get_config_from_sec(self.mode, 'num_gpus', 1) + + def build_input(self, use_dataloader): + self.feature_input = [] + for name, dim in zip(self.feature_names, self.feature_dims): + self.feature_input.append(fluid.data(shape=[None, dim], lod_level=1, dtype='float32', name=name)) + if use_dataloader: + assert self.mode != 'infer', \ + 'dataloader is not recommendated when infer, please set use_dataloader to be false.' + self.dataloader = fluid.io.DataLoader.from_generator( + feed_list=self.feature_input, #+ [self.label_input], + capacity=8, + iterable=True) + + def build_model(self): + att_outs = [] + for i, (input_dim, feature) in enumerate(zip(self.feature_dims, self.feature_input)): + att = LSTMAttentionModel(input_dim, self.embedding_size, self.lstm_size, self.drop_rate) + att_out = att.forward(feature, is_training=(self.mode == 'train')) + att_outs.append(att_out) + if len(att_outs) > 1: + out = fluid.layers.concat(att_outs, axis=1) + else: + out = att_outs[0] + + fc1 = fluid.layers.fc( + input=out, + size=8192, + act='relu', + bias_attr=ParamAttr( + regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name='fc1') + fc2 = fluid.layers.fc( + input=fc1, + size=4096, + act='tanh', + bias_attr=ParamAttr( + regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name='fc2') + + self.logit = fluid.layers.fc(input=fc2, size=self.num_classes, act=None, \ + bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), + initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name = 'output') + + self.output = fluid.layers.sigmoid(self.logit) + + def optimizer(self): + assert self.mode == 'train', "optimizer only can be get in train mode" + values = [self.learning_rate * (self.decay_gamma**i) for i in range(len(self.decay_epochs) + 1)] + iter_per_epoch = self.num_samples / self.batch_size + boundaries = [e * iter_per_epoch for e in self.decay_epochs] + return fluid.optimizer.RMSProp( + learning_rate=fluid.layers.piecewise_decay(values=values, boundaries=boundaries), + centered=True, + regularization=fluid.regularizer.L2Decay(self.weight_decay)) + + def loss(self): + assert self.mode != 'infer', "invalid loss calculationg in infer mode" + cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=self.logit, label=self.label_input) + cost = fluid.layers.reduce_sum(cost, dim=-1) + sum_cost = fluid.layers.reduce_sum(cost) + self.loss_ = fluid.layers.scale(sum_cost, scale=self.num_gpus, bias_after_scale=False) + return self.loss_ + + def outputs(self): + return [self.output, self.logit] + + def feeds(self): + return self.feature_input + + def fetches(self): + fetch_list = [self.output] + return fetch_list + + def weights_info(self): + return ('AttentionLSTM.pdparams', + 'https://paddlemodels.bj.bcebos.com/video_classification/AttentionLSTM.pdparams') + + def load_pretrain_params(self, exe, pretrain, prog, place): + logger.info("Load pretrain weights from {}, exclude fc layer.".format(pretrain)) + + state_dict = fluid.load_program_state(pretrain) + dict_keys = list(state_dict.keys()) + for name in dict_keys: + if "fc_0" in name: + del state_dict[name] + logger.info('Delete {} from pretrained parameters. Do not load it'.format(name)) + fluid.set_program_state(prog, state_dict) diff --git a/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py b/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..91dcb97a80270d41ef8912cfb509d4b5b45ffe35 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/models/attention_lstm/lstm_attention.py @@ -0,0 +1,57 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + + +class LSTMAttentionModel(object): + """LSTM Attention Model""" + + def __init__(self, bias_attr, embedding_size=512, lstm_size=1024, drop_rate=0.5): + self.lstm_size = lstm_size + self.embedding_size = embedding_size + self.drop_rate = drop_rate + + def forward(self, input, is_training): + input_fc = fluid.layers.fc( + input=input, + size=self.embedding_size, + act='tanh', + bias_attr=ParamAttr( + regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0)), + name='rgb_fc') + + lstm_forward_fc = fluid.layers.fc( + input=input_fc, size=self.lstm_size * 4, act=None, bias_attr=False, name='rgb_fc_forward') + + lstm_forward, _ = fluid.layers.dynamic_lstm( + input=lstm_forward_fc, size=self.lstm_size * 4, is_reverse=False, name='rgb_lstm_forward') + + lsmt_backward_fc = fluid.layers.fc( + input=input_fc, size=self.lstm_size * 4, act=None, bias_attr=False, name='rgb_fc_backward') + + lstm_backward, _ = fluid.layers.dynamic_lstm( + input=lsmt_backward_fc, size=self.lstm_size * 4, is_reverse=True, name='rgb_lstm_backward') + + lstm_concat = fluid.layers.concat(input=[lstm_forward, lstm_backward], axis=1) + + lstm_dropout = fluid.layers.dropout(x=lstm_concat, dropout_prob=self.drop_rate, is_test=(not is_training)) + + lstm_weight = fluid.layers.fc( + input=lstm_dropout, size=1, act='sequence_softmax', bias_attr=False, name='rgb_weight') + + scaled = fluid.layers.elementwise_mul(x=lstm_dropout, y=lstm_weight, axis=0) + lstm_pool = fluid.layers.sequence_pool(input=scaled, pool_type='sum') + + return lstm_pool diff --git a/modules/video/classification/videotag_tsn_lstm/resource/models/model.py b/modules/video/classification/videotag_tsn_lstm/resource/models/model.py new file mode 100644 index 0000000000000000000000000000000000000000..cdb8429cad602ade2ca2463049578bf89ad10255 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/models/model.py @@ -0,0 +1,152 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import logging +try: + from configparser import ConfigParser +except: + from ConfigParser import ConfigParser + +import paddle.fluid as fluid + +WEIGHT_DIR = os.path.join(os.path.expanduser('~'), '.paddle', 'weights') + +logger = logging.getLogger(__name__) + + +def is_parameter(var): + return isinstance(var, fluid.framework.Parameter) + + +class NotImplementError(Exception): + "Error: model function not implement" + + def __init__(self, model, function): + super(NotImplementError, self).__init__() + self.model = model.__class__.__name__ + self.function = function.__name__ + + def __str__(self): + return "Function {}() is not implemented in model {}".format(self.function, self.model) + + +class ModelNotFoundError(Exception): + "Error: model not found" + + def __init__(self, model_name, avail_models): + super(ModelNotFoundError, self).__init__() + self.model_name = model_name + self.avail_models = avail_models + + def __str__(self): + msg = "Model {} Not Found.\nAvailiable models:\n".format(self.model_name) + for model in self.avail_models: + msg += " {}\n".format(model) + return msg + + +class ModelBase(object): + def __init__(self, name, cfg, mode='train'): + assert mode in ['train', 'valid', 'test', 'infer'], \ + "Unknown mode type {}".format(mode) + self.name = name + self.is_training = (mode == 'train') + self.mode = mode + self.cfg = cfg + self.dataloader = None + + def build_model(self): + "build model struct" + raise NotImplementError(self, self.build_model) + + def build_input(self, use_dataloader): + "build input Variable" + raise NotImplementError(self, self.build_input) + + def optimizer(self): + "get model optimizer" + raise NotImplementError(self, self.optimizer) + + def outputs(self): + "get output variable" + raise NotImplementedError(self, self.outputs) + + def loss(self): + "get loss variable" + raise NotImplementedError(self, self.loss) + + def feeds(self): + "get feed inputs list" + raise NotImplementError(self, self.feeds) + + def fetches(self): + "get fetch list of model" + raise NotImplementError(self, self.fetches) + + def weights_info(self): + "get model weight default path and download url" + raise NotImplementError(self, self.weights_info) + + def dataloader(self): + return self.dataloader + + def epoch_num(self): + "get train epoch num" + return self.cfg.TRAIN.epoch + + def pretrain_info(self): + "get pretrain base model directory" + return (None, None) + + def load_pretrain_params(self, exe, pretrain, prog, place): + logger.info("Load pretrain weights from {}".format(pretrain)) + state_dict = fluid.load_program_state(pretrain) + fluid.set_program_state(prog, state_dict) + + def load_test_weights(self, exe, weights, prog): + params_list = list(filter(is_parameter, prog.list_vars())) + fluid.load(prog, weights, executor=exe, var_list=params_list) + + def get_config_from_sec(self, sec, item, default=None): + if sec.upper() not in self.cfg: + return default + return self.cfg[sec.upper()].get(item, default) + + +class ModelZoo(object): + def __init__(self): + self.model_zoo = {} + + def regist(self, name, model): + assert model.__base__ == ModelBase, "Unknow model type {}".format(type(model)) + self.model_zoo[name] = model + + def get(self, name, cfg, mode='train'): + for k, v in self.model_zoo.items(): + if k.upper() == name.upper(): + return v(name, cfg, mode) + raise ModelNotFoundError(name, self.model_zoo.keys()) + + +# singleton model_zoo +model_zoo = ModelZoo() + + +def regist_model(name, model): + model_zoo.regist(name, model) + + +def get_model(name, cfg, mode='train'): + return model_zoo.get(name, cfg, mode) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/models/tsn/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py new file mode 100644 index 0000000000000000000000000000000000000000..974e01ee44e61ef4cfcfecdfc1ddc63aa0eb0e00 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name.py @@ -0,0 +1,106 @@ +import json + +depth = [3, 4, 23, 3] +num_filters = [64, 128, 256, 512] + +layer_index = 1 +caffe_param_list = [] + +name_list = ['conv1'] +params_list = [] +name = name_list[0] +conv_w = name + '_weights' +caffe_conv_w = 'ConvNdBackward' + str(layer_index) + '_weights' +params_list.append(conv_w) +caffe_param_list.append(caffe_conv_w) + +layer_index += 1 + +bn_name = "bn_" + name +caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' +params_list.append(bn_name + '_scale') +params_list.append(bn_name + '_offset') +params_list.append(bn_name + '_mean') +params_list.append(bn_name + '_variance') + +caffe_param_list.append(caffe_bn_name + '_scale') +caffe_param_list.append(caffe_bn_name + '_offset') +caffe_param_list.append(caffe_bn_name + '_mean') +caffe_param_list.append(caffe_bn_name + '_variance') + +filter_input = 64 + +layer_index += 3 + +for block in range(len(depth)): + for i in range(depth[block]): + if block == 2: + if i == 0: + name = "res" + str(block + 2) + "a" + else: + name = "res" + str(block + 2) + "b" + str(i) + else: + name = "res" + str(block + 2) + chr(97 + i) + + name_list.append(name) + + for item in ['a', 'b', 'c']: + name_branch = name + '_branch2' + item + bn_name = 'bn' + name_branch[3:] + params_list.append(name_branch + '_weights') + params_list.append(bn_name + '_scale') + params_list.append(bn_name + '_offset') + params_list.append(bn_name + '_mean') + params_list.append(bn_name + '_variance') + + caffe_name_branch = 'ConvNdBackward' + str(layer_index) + caffe_param_list.append(caffe_name_branch + '_weights') + + layer_index += 1 + caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' + caffe_param_list.append(caffe_bn_name + '_scale') + caffe_param_list.append(caffe_bn_name + '_offset') + caffe_param_list.append(caffe_bn_name + '_mean') + caffe_param_list.append(caffe_bn_name + '_variance') + + layer_index += 2 + + stride = 2 if i == 0 and block != 0 else 1 + filter_num = num_filters[block] + filter_output = filter_num * 4 + + if (filter_output != filter_input) or (stride != 1): + name_branch = name + '_branch1' + + print('filter_input {}, filter_output {}, stride {}, branch name {}'.format( + filter_input, filter_output, stride, name_branch)) + bn_name = 'bn' + name_branch[3:] + params_list.append(name_branch + '_weights') + params_list.append(bn_name + '_scale') + params_list.append(bn_name + '_offset') + params_list.append(bn_name + '_mean') + params_list.append(bn_name + '_variance') + + caffe_name_branch = 'ConvNdBackward' + str(layer_index) + caffe_param_list.append(caffe_name_branch + '_weights') + + layer_index += 1 + caffe_bn_name = 'BatchNormBackward' + str(layer_index) + '_bn' + caffe_param_list.append(caffe_bn_name + '_scale') + caffe_param_list.append(caffe_bn_name + '_offset') + caffe_param_list.append(caffe_bn_name + '_mean') + caffe_param_list.append(caffe_bn_name + '_variance') + + layer_index += 3 + else: + layer_index += 2 + + filter_input = filter_output + +map_dict = {} + +for i in range(len(params_list)): + print(params_list[i], caffe_param_list[i]) + map_dict[params_list[i]] = caffe_param_list[i] + +json.dump(map_dict, open('name_map.json', 'w')) diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 rename to modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name1 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 rename to modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name2 diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json rename to modules/video/classification/videotag_tsn_lstm/resource/models/tsn/name_map.json diff --git a/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py new file mode 100644 index 0000000000000000000000000000000000000000..12ba68fa849df7fa4faff079f74d8d02648e1c3e --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn.py @@ -0,0 +1,172 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import numpy as np + +import paddle.fluid as fluid +from paddle.fluid import ParamAttr + +from ..model import ModelBase +from .tsn_res_model import TSN_ResNet + +import logging +logger = logging.getLogger(__name__) + +__all__ = ["TSN"] + + +class TSN(ModelBase): + def __init__(self, name, cfg, mode='train'): + super(TSN, self).__init__(name, cfg, mode=mode) + self.get_config() + + def get_config(self): + self.num_classes = self.get_config_from_sec('model', 'num_classes') + self.seg_num = self.get_config_from_sec('model', 'seg_num') + self.seglen = self.get_config_from_sec('model', 'seglen') + self.image_mean = self.get_config_from_sec('model', 'image_mean') + self.image_std = self.get_config_from_sec('model', 'image_std') + self.num_layers = self.get_config_from_sec('model', 'num_layers') + + self.num_epochs = self.get_config_from_sec('train', 'epoch') + self.total_videos = self.get_config_from_sec('train', 'total_videos') + self.base_learning_rate = self.get_config_from_sec('train', 'learning_rate') + self.learning_rate_decay = self.get_config_from_sec('train', 'learning_rate_decay') + self.l2_weight_decay = self.get_config_from_sec('train', 'l2_weight_decay') + self.momentum = self.get_config_from_sec('train', 'momentum') + + self.seg_num = self.get_config_from_sec(self.mode, 'seg_num', self.seg_num) + self.target_size = self.get_config_from_sec(self.mode, 'target_size') + self.batch_size = self.get_config_from_sec(self.mode, 'batch_size') + + def build_input(self, use_dataloader=True): + image_shape = [3, self.target_size, self.target_size] + image_shape[0] = image_shape[0] * self.seglen + image_shape = [None, self.seg_num] + image_shape + self.use_dataloader = use_dataloader + + image = fluid.data(name='image', shape=image_shape, dtype='float32') + if self.mode != 'infer': + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + else: + label = None + + if use_dataloader: + assert self.mode != 'infer', \ + 'dataloader is not recommendated when infer, please set use_dataloader to be false.' + self.dataloader = fluid.io.DataLoader.from_generator(feed_list=[image, label], capacity=4, iterable=True) + + self.feature_input = [image] + self.label_input = label + + def create_model_args(self): + cfg = {} + cfg['layers'] = self.num_layers + cfg['class_dim'] = self.num_classes + cfg['seg_num'] = self.seg_num + return cfg + + def build_model(self): + cfg = self.create_model_args() + videomodel = TSN_ResNet(layers=cfg['layers'], seg_num=cfg['seg_num'], is_training=(self.mode == 'train')) + out = videomodel.net(input=self.feature_input[0], class_dim=cfg['class_dim']) + self.feature_output = out + #self.network_outputs = [out] + + def optimizer(self): + assert self.mode == 'train', "optimizer only can be get in train mode" + epoch_points = [self.num_epochs / 3, self.num_epochs * 2 / 3] + total_videos = self.total_videos + step = int(total_videos / self.batch_size + 1) + bd = [e * step for e in epoch_points] + base_lr = self.base_learning_rate + lr_decay = self.learning_rate_decay + lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay] + l2_weight_decay = self.l2_weight_decay + momentum = self.momentum + optimizer = fluid.optimizer.Momentum( + learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), + momentum=momentum, + regularization=fluid.regularizer.L2Decay(l2_weight_decay)) + + return optimizer + + def loss(self): + assert self.mode != 'infer', "invalid loss calculationg in infer mode" + cost = fluid.layers.cross_entropy(input=self.network_outputs[0], \ + label=self.label_input, ignore_index=-1) + self.loss_ = fluid.layers.mean(x=cost) + return self.loss_ + + def outputs(self): + return self.network_outputs + + def feeds(self): + return self.feature_input #if self.mode == 'infer' else self.feature_input + [ +# self.label_input +# ] + + def fetches(self): + if self.mode == 'train' or self.mode == 'valid': + losses = self.loss() + fetch_list = [losses, self.network_outputs[0], self.label_input] + elif self.mode == 'test': + #losses = self.loss() + fetch_list = [self.feature_output, self.label_input] + elif self.mode == 'infer': + fetch_list = self.feature_output + else: + raise NotImplementedError('mode {} not implemented'.format(self.mode)) + + return fetch_list + + def pretrain_info(self): + return ('ResNet50_pretrained', + 'https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz') + + def weights_info(self): + return ('TSN.pdparams', 'https://paddlemodels.bj.bcebos.com/video_classification/TSN.pdparams') + + def load_pretrain_params(self, exe, pretrain, prog, place): + def is_parameter(var): + return isinstance(var, fluid.framework.Parameter) + + params_list = list(filter(is_parameter, prog.list_vars())) + for param in params_list: + print(param.name) + + logger.info("Load pretrain weights from {}, exclude fc layer.".format(pretrain)) + + state_dict = fluid.load_program_state(pretrain) + dict_keys = list(state_dict.keys()) + for name in dict_keys: + if "fc_0" in name: + del state_dict[name] + print('Delete {} from pretrained parameters. Do not load it'.format(name)) + fluid.set_program_state(prog, state_dict) + + +# def load_test_weights(self, exe, weights, prog): +# def is_parameter(var): +# return isinstance(var, fluid.framework.Parameter) +# params_list = list(filter(is_parameter, prog.list_vars())) + +# state_dict = np.load(weights) +# for p in params_list: +# if p.name in state_dict.keys(): +# print('########### load param {} from file'.format(p.name)) +# else: +# print('----------- param {} not in file'.format(p.name)) +# fluid.set_program_state(prog, state_dict) +# fluid.save(prog, './model_weight/tsn') diff --git a/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py new file mode 100644 index 0000000000000000000000000000000000000000..802e174aa48c39fa8279b44d4d49408ca9856589 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/models/tsn/tsn_res_model.py @@ -0,0 +1,114 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import time +import sys +import paddle.fluid as fluid +import math + + +class TSN_ResNet(): + def __init__(self, layers=50, seg_num=7, is_training=True): + self.layers = 101 #layers + self.seg_num = seg_num + self.is_training = is_training + + def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + return fluid.layers.batch_norm( + input=conv, + act=act, + is_test=(not self.is_training), + param_attr=fluid.param_attr.ParamAttr(name=bn_name + "_scale"), + bias_attr=fluid.param_attr.ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + def net(self, input, class_dim=101): + layers = self.layers + seg_num = self.seg_num + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + # reshape input + channels = input.shape[2] + short_size = input.shape[3] + input = fluid.layers.reshape(x=input, shape=[-1, channels, short_size, short_size]) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu', name='conv1') + conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + + pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + feature = fluid.layers.reshape(x=pool, shape=[-1, seg_num, pool.shape[1]]) + return feature diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/reader/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py b/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..8ba51db8ea5d2333cb9db6f090e1bfe7c858ade3 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/reader/kinetics_reader.py @@ -0,0 +1,217 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import sys + +import random +import functools +import logging +try: + import cPickle as pickle + from cStringIO import StringIO +except ImportError: + import pickle + from io import BytesIO + +import paddle +import cv2 +import numpy as np +from PIL import Image + +from .reader_utils import DataReader + +logger = logging.getLogger(__name__) +python_ver = sys.version_info + + +class KineticsReader(DataReader): + """ + Data reader for kinetics dataset of two format mp4 and pkl. + 1. mp4, the original format of kinetics400 + 2. pkl, the mp4 was decoded previously and stored as pkl + In both case, load the data, and then get the frame data in the form of numpy and label as an integer. + dataset cfg: format + num_classes + seg_num + short_size + target_size + num_reader_threads + buf_size + image_mean + image_std + batch_size + list + """ + + def __init__(self, name, mode, cfg): + super(KineticsReader, self).__init__(name, mode, cfg) + self.format = cfg.MODEL.format + self.num_classes = self.get_config_from_sec('model', 'num_classes') + self.seg_num = self.get_config_from_sec('model', 'seg_num') + self.seglen = self.get_config_from_sec('model', 'seglen') + + self.seg_num = self.get_config_from_sec(mode, 'seg_num', self.seg_num) + self.short_size = self.get_config_from_sec(mode, 'short_size') + self.target_size = self.get_config_from_sec(mode, 'target_size') + self.num_reader_threads = self.get_config_from_sec(mode, 'num_reader_threads') + self.buf_size = self.get_config_from_sec(mode, 'buf_size') + + self.img_mean = np.array(cfg.MODEL.image_mean).reshape([3, 1, 1]).astype(np.float32) + self.img_std = np.array(cfg.MODEL.image_std).reshape([3, 1, 1]).astype(np.float32) + # set batch size and file list + self.batch_size = cfg[mode.upper()]['batch_size'] + self.filelist = cfg[mode.upper()]['filelist'] + + def create_reader(self): + _reader = self._reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \ + short_size = self.short_size, target_size = self.target_size, \ + img_mean = self.img_mean, img_std = self.img_std, \ + shuffle = (self.mode == 'train'), \ + num_threads = self.num_reader_threads, \ + buf_size = self.buf_size, format = self.format) + + def _batch_reader(): + batch_out = [] + for imgs, label in _reader(): + if imgs is None: + continue + batch_out.append((imgs, label)) + if len(batch_out) == self.batch_size: + yield batch_out + batch_out = [] + + return _batch_reader + + def _reader_creator(self, + pickle_list, + mode, + seg_num, + seglen, + short_size, + target_size, + img_mean, + img_std, + shuffle=False, + num_threads=1, + buf_size=1024, + format='pkl'): + def decode_mp4(sample, mode, seg_num, seglen, short_size, target_size, img_mean, img_std): + sample = sample[0].split(' ') + mp4_path = sample[0] + try: + imgs = mp4_loader(mp4_path, seg_num, seglen, mode) + if len(imgs) < 1: + logger.error('{} frame length {} less than 1.'.format(mp4_path, len(imgs))) + return None, None + except: + logger.error('Error when loading {}'.format(mp4_path)) + return None, None + + return imgs_transform(imgs, mode, seg_num, seglen, \ + short_size, target_size, img_mean, img_std, name = self.name), mp4_path + + def reader(): + lines = [line.strip() for line in pickle_list] + if shuffle: + random.shuffle(lines) + for line in lines: + pickle_path = line.strip() + yield [pickle_path] + + mapper = functools.partial( + decode_mp4, + mode=mode, + seg_num=seg_num, + seglen=seglen, + short_size=short_size, + target_size=target_size, + img_mean=img_mean, + img_std=img_std) + + return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size) + + +def imgs_transform(imgs, mode, seg_num, seglen, short_size, target_size, img_mean, img_std, name=''): + imgs = group_scale(imgs, short_size) + + np_imgs = np.array([np.array(img).astype('float32') for img in imgs]) #dhwc + np_imgs = group_center_crop(np_imgs, target_size) + np_imgs = np_imgs.transpose(0, 3, 1, 2) / 255 #dchw + np_imgs -= img_mean + np_imgs /= img_std + + return np_imgs + + +def group_center_crop(np_imgs, target_size): + d, h, w, c = np_imgs.shape + th, tw = target_size, target_size + assert (w >= target_size) and (h >= target_size), \ + "image width({}) and height({}) should be larger than crop size".format(w, h, target_size) + + h_off = int(round((h - th) / 2.)) + w_off = int(round((w - tw) / 2.)) + + img_crop = np_imgs[:, h_off:h_off + target_size, w_off:w_off + target_size, :] + return img_crop + + +def group_scale(imgs, target_size): + resized_imgs = [] + for i in range(len(imgs)): + img = imgs[i] + w, h = img.size + if (w <= h and w == target_size) or (h <= w and h == target_size): + resized_imgs.append(img) + continue + + if w < h: + ow = target_size + oh = int(target_size * 4.0 / 3.0) + resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) + else: + oh = target_size + ow = int(target_size * 4.0 / 3.0) + resized_imgs.append(img.resize((ow, oh), Image.BILINEAR)) + + return resized_imgs + + +def mp4_loader(filepath, nsample, seglen, mode): + cap = cv2.VideoCapture(filepath) + videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + sampledFrames = [] + for i in range(videolen): + ret, frame = cap.read() + # maybe first frame is empty + if ret == False: + continue + img = frame[:, :, ::-1] + sampledFrames.append(img) + average_dur = int(len(sampledFrames) / nsample) + imgs = [] + for i in range(nsample): + idx = 0 + if average_dur >= seglen: + idx = (average_dur - 1) // 2 + idx += i * average_dur + elif average_dur >= 1: + idx += i * average_dur + else: + idx = i + + for jj in range(idx, idx + seglen): + imgbuf = sampledFrames[int(jj % len(sampledFrames))] + img = Image.fromarray(imgbuf, mode='RGB') + imgs.append(img) + return imgs diff --git a/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py b/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..867c114789d30a9cddb4663c1a7330feee00b5c6 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/reader/reader_utils.py @@ -0,0 +1,74 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + + +class ReaderNotFoundError(Exception): + "Error: reader not found" + + def __init__(self, reader_name, avail_readers): + super(ReaderNotFoundError, self).__init__() + self.reader_name = reader_name + self.avail_readers = avail_readers + + def __str__(self): + msg = "Reader {} Not Found.\nAvailiable readers:\n".format(self.reader_name) + for reader in self.avail_readers: + msg += " {}\n".format(reader) + return msg + + +class DataReader(object): + """data reader for video input""" + + def __init__(self, model_name, mode, cfg): + self.name = model_name + self.mode = mode + self.cfg = cfg + + def create_reader(self): + """Not implemented""" + pass + + def get_config_from_sec(self, sec, item, default=None): + if sec.upper() not in self.cfg: + return default + return self.cfg[sec.upper()].get(item, default) + + +class ReaderZoo(object): + def __init__(self): + self.reader_zoo = {} + + def regist(self, name, reader): + assert reader.__base__ == DataReader, "Unknow model type {}".format(type(reader)) + self.reader_zoo[name] = reader + + def get(self, name, mode, cfg): + for k, v in self.reader_zoo.items(): + if k == name: + return v(name, mode, cfg) + raise ReaderNotFoundError(name, self.reader_zoo.keys()) + + +# singleton reader_zoo +reader_zoo = ReaderZoo() + + +def regist_reader(name, reader): + reader_zoo.regist(name, reader) + + +def get_reader(name, mode, cfg): + reader_model = reader_zoo.get(name, mode, cfg) + return reader_model.create_reader() diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py b/modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/metrics/youtube8m/__init__.py rename to modules/video/classification/videotag_tsn_lstm/resource/utils/__init__.py diff --git a/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py b/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..48cb94a0e298a0bddcd134b5a3174c3904d978b0 --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/utils/config_utils.py @@ -0,0 +1,75 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import logging + +from .utility import AttrDict + +logger = logging.getLogger(__name__) + +CONFIG_SECS = [ + 'train', + 'valid', + 'test', + 'infer', +] + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + import yaml + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader)) + create_attr_dict(yaml_config) + return yaml_config + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + return + + +def merge_configs(cfg, sec, args_dict): + assert sec in CONFIG_SECS, "invalid config section {}".format(sec) + sec_dict = getattr(cfg, sec.upper()) + for k, v in args_dict.items(): + if v is None: + continue + try: + if hasattr(sec_dict, k): + setattr(sec_dict, k, v) + except: + pass + return cfg + + +def print_configs(cfg, mode): + logger.info("---------------- {:>5} Arguments ----------------".format(mode)) + for sec, sec_items in cfg.items(): + logger.info("{}:".format(sec)) + for k, v in sec_items.items(): + logger.info(" {}:{}".format(k, v)) + logger.info("-------------------------------------------------") diff --git a/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py b/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..73b1a5f87049d44cef8b86ed368f8184a55bf0ea --- /dev/null +++ b/modules/video/classification/videotag_tsn_lstm/resource/utils/train_utils.py @@ -0,0 +1,137 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import logging + +import time +import numpy as np +import paddle.fluid as fluid +from paddle.fluid import profiler + +logger = logging.getLogger(__name__) + + +def log_lr_and_step(): + try: + # In optimizers, if learning_rate is set as constant, lr_var + # name is 'learning_rate_0', and iteration counter is not + # recorded. If learning_rate is set as decayed values from + # learning_rate_scheduler, lr_var name is 'learning_rate', + # and iteration counter is recorded with name '@LR_DECAY_COUNTER@', + # better impliment is required here + lr_var = fluid.global_scope().find_var("learning_rate") + if not lr_var: + lr_var = fluid.global_scope().find_var("learning_rate_0") + lr = np.array(lr_var.get_tensor()) + + lr_count = '[-]' + lr_count_var = fluid.global_scope().find_var("@LR_DECAY_COUNTER@") + if lr_count_var: + lr_count = np.array(lr_count_var.get_tensor()) + logger.info("------- learning rate {}, learning rate counter {} -----".format(np.array(lr), np.array(lr_count))) + except: + logger.warn("Unable to get learning_rate and LR_DECAY_COUNTER.") + + +def test_with_dataloader(exe, + compiled_test_prog, + test_dataloader, + test_fetch_list, + test_metrics, + log_interval=0, + save_model_name=''): + if not test_dataloader: + logger.error("[TEST] get dataloader failed.") + test_metrics.reset() + test_iter = 0 + + for data in test_dataloader(): + test_outs = exe.run(compiled_test_prog, fetch_list=test_fetch_list, feed=data) + test_metrics.accumulate(test_outs) + if log_interval > 0 and test_iter % log_interval == 0: + test_metrics.calculate_and_log_out(test_outs, \ + info = '[TEST] test_iter {} '.format(test_iter)) + test_iter += 1 + test_metrics.finalize_and_log_out("[TEST] Finish") + + +def train_with_dataloader(exe, train_prog, compiled_train_prog, train_dataloader, \ + train_fetch_list, train_metrics, epochs = 10, \ + log_interval = 0, valid_interval = 0, save_dir = './', \ + save_model_name = 'model', fix_random_seed = False, \ + compiled_test_prog = None, test_dataloader = None, \ + test_fetch_list = None, test_metrics = None, \ + is_profiler = None, profiler_path = None): + if not train_dataloader: + logger.error("[TRAIN] get dataloader failed.") + epoch_periods = [] + train_loss = 0 + for epoch in range(epochs): + log_lr_and_step() + + train_iter = 0 + epoch_periods = [] + + for data in train_dataloader(): + cur_time = time.time() + train_outs = exe.run(compiled_train_prog, fetch_list=train_fetch_list, feed=data) + period = time.time() - cur_time + epoch_periods.append(period) + if log_interval > 0 and (train_iter % log_interval == 0): + train_metrics.calculate_and_log_out(train_outs, \ + info = '[TRAIN] Epoch {}, iter {} '.format(epoch, train_iter)) + train_iter += 1 + + # NOTE: profiler tools, used for benchmark + if is_profiler and epoch == 0 and train_iter == log_interval: + profiler.start_profiler("All") + elif is_profiler and epoch == 0 and train_iter == log_interval + 5: + profiler.stop_profiler("total", profiler_path) + return + + if len(epoch_periods) < 1: + logger.info('No iteration was executed, please check the data reader') + sys.exit(1) + + logger.info('[TRAIN] Epoch {} training finished, average time: {}'.format(epoch, np.mean(epoch_periods[1:]))) + save_model(exe, train_prog, save_dir, save_model_name, "_epoch{}".format(epoch), save_type='.pdckpt') + save_model(exe, train_prog, save_dir, save_model_name, "_epoch{}".format(epoch), save_type='.pdparams') + if compiled_test_prog and valid_interval > 0 and (epoch + 1) % valid_interval == 0: + test_with_dataloader(exe, compiled_test_prog, test_dataloader, test_fetch_list, test_metrics, log_interval, + save_model_name) + + save_model(exe, train_prog, save_dir, save_model_name, '_final', save_type='.pdckpt') + save_model(exe, train_prog, save_dir, save_model_name, '_final', save_type='.pdparams') + #when fix_random seed for debug + if fix_random_seed: + cards = os.environ.get('CUDA_VISIBLE_DEVICES') + gpu_num = len(cards.split(",")) + print("kpis\ttrain_cost_card{}\t{}".format(gpu_num, train_loss)) + print("kpis\ttrain_speed_card{}\t{}".format(gpu_num, np.mean(epoch_periods))) + + +def save_model(exe, program, save_dir, model_name, postfix=None, save_type='.pdckpt'): + """ + save_type: '.pdckpt' or '.pdparams', '.pdckpt' for all persistable variables, + '.pdparams' for parameters only + """ + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + saved_model_name = model_name + postfix + + fluid.save(program, os.path.join(save_dir, saved_model_name)) + + return diff --git a/hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py b/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py similarity index 100% rename from hub_module/modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py rename to modules/video/classification/videotag_tsn_lstm/resource/utils/utility.py