未验证 提交 7c6e76e5 编写于 作者: C cuicheng01 提交者: GitHub

Merge pull request #1265 from cuicheng01/add_multilabel

Add multilabel feature
Global:
infer_imgs: "./images/0517_2715693311.jpg"
inference_model_dir: "../inference/"
batch_size: 1
use_gpu: True
enable_mkldnn: False
cpu_num_threads: 10
enable_benchmark: True
use_fp16: False
ir_optim: True
use_tensorrt: False
gpu_mem: 8000
enable_profile: False
PreProcess:
transform_ops:
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 0.00392157
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
channel_num: 3
- ToCHWImage:
PostProcess:
main_indicator: MultiLabelTopk
MultiLabelTopk:
topk: 5
class_id_map_file: None
SavePreLabel:
save_dir: ./pre_label/
......@@ -81,12 +81,14 @@ class Topk(object):
class_id_map = None
return class_id_map
def __call__(self, x, file_names=None):
def __call__(self, x, file_names=None, multilabel=False):
if file_names is not None:
assert x.shape[0] == len(file_names)
y = []
for idx, probs in enumerate(x):
index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32")
index = probs.argsort(axis=0)[-self.topk:][::-1].astype(
"int32") if not multilabel else np.where(
probs >= 0.5)[0].astype("int32")
clas_id_list = []
score_list = []
label_name_list = []
......@@ -108,6 +110,14 @@ class Topk(object):
return y
class MultiLabelTopk(Topk):
def __init__(self, topk=1, class_id_map_file=None):
super().__init__()
def __call__(self, x, file_names=None):
return super().__call__(x, file_names, multilabel=True)
class SavePreLabel(object):
def __init__(self, save_dir):
if save_dir is None:
......@@ -128,23 +138,24 @@ class SavePreLabel(object):
os.makedirs(output_dir, exist_ok=True)
shutil.copy(image_file, output_dir)
class Binarize(object):
def __init__(self, method = "round"):
def __init__(self, method="round"):
self.method = method
self.unit = np.array([[128, 64, 32, 16, 8, 4, 2, 1]]).T
def __call__(self, x, file_names=None):
if self.method == "round":
x = np.round(x + 1).astype("uint8") - 1
if self.method == "sign":
x = ((np.sign(x) + 1) / 2).astype("uint8")
embedding_size = x.shape[1]
assert embedding_size % 8 == 0, "The Binary index only support vectors with sizes multiple of 8"
byte = np.zeros([x.shape[0], embedding_size // 8], dtype=np.uint8)
for i in range(embedding_size // 8):
byte[:, i:i+1] = np.dot(x[:, i * 8: (i + 1)* 8], self.unit)
byte[:, i:i + 1] = np.dot(x[:, i * 8:(i + 1) * 8], self.unit)
return byte
......@@ -71,7 +71,6 @@ class ClsPredictor(Predictor):
output_names = self.paddle_predictor.get_output_names()
output_tensor = self.paddle_predictor.get_output_handle(output_names[
0])
if self.benchmark:
self.auto_logger.times.start()
if not isinstance(images, (list, )):
......@@ -119,7 +118,6 @@ def main(config):
) == len(image_list):
if len(batch_imgs) == 0:
continue
batch_results = cls_predictor.predict(batch_imgs)
for number, result_dict in enumerate(batch_results):
filename = batch_names[number]
......
# classification
python3.7 python/predict_cls.py -c configs/inference_cls.yaml
# multilabel_classification
#python3.7 python/predict_cls.py -c configs/inference_multilabel_cls.yaml
# feature extractor
# python3.7 python/predict_rec.py -c configs/inference_rec.yaml
......
......@@ -25,58 +25,68 @@ tar -xf NUS-SCENE-dataset.tar
cd ../../
```
## Environment
## Training
### Download pretrained model
```shell
export CUDA_VISIBLE_DEVICES=0,1,2,3
python3 -m paddle.distributed.launch \
--gpus="0,1,2,3" \
tools/train.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml
```
You can use the following commands to download the pretrained model of ResNet50_vd.
After training for 10 epochs, the best accuracy over the validation set should be around 0.95.
## Evaluation
```bash
mkdir pretrained
cd pretrained
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
cd ../
python tools/eval.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml \
-o Arch.pretrained="./output/MobileNetV1/best_model"
```
## Training
## Prediction
```shell
export CUDA_VISIBLE_DEVICES=0
python -m paddle.distributed.launch \
--gpus="0" \
tools/train.py \
-c ./configs/quick_start/ResNet50_vd_multilabel.yaml
```bash
python3 tools/infer.py
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml \
-o Arch.pretrained="./output/MobileNetV1/best_model"
```
After training for 10 epochs, the best accuracy over the validation set should be around 0.72.
You will get multiple output such as the following:
```
[{'class_ids': [6, 13, 17, 23, 26, 30], 'scores': [0.95683, 0.5567, 0.55211, 0.99088, 0.5943, 0.78767], 'file_name': './deploy/images/0517_2715693311.jpg', 'label_names': []}]
```
## Evaluation
## Prediction based on prediction engine
### Export model
```bash
python tools/eval.py \
-c ./configs/quick_start/ResNet50_vd_multilabel.yaml \
-o pretrained_model="./output/ResNet50_vd/best_model/ppcls" \
-o load_static_weights=False
python3 tools/export_model.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml \
-o Arch.pretrained="./output/MobileNetV1/best_model"
```
The metric of evaluation is based on mAP, which is commonly used in multilabel task to show model perfermance. The mAP over validation set should be around 0.57.
The default path of the inference model is under the current path `./inference`
## Prediction
### Prediction based on prediction engine
Enter the deploy directory:
```bash
python tools/infer/infer.py \
-i "./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/images/0199_434752251.jpg" \
--model ResNet50_vd \
--pretrained_model "./output/ResNet50_vd/best_model/ppcls" \
--use_gpu True \
--load_static_weights False \
--multilabel True \
--class_num 33
cd ./deploy
```
Prediction based on prediction engine:
```
python3 python/predict_cls.py \
-c configs/inference_multilabel_cls.yaml
```
You will get multiple output such as the following:
```
class id: 3, probability: 0.6025
class id: 23, probability: 0.5491
class id: 32, probability: 0.7006
```
\ No newline at end of file
```
0517_2715693311.jpg: class id(s): [6, 13, 17, 23, 26, 30], score(s): [0.96, 0.56, 0.55, 0.99, 0.59, 0.79], label_name(s): []
```
......@@ -25,58 +25,66 @@ tar -xf NUS-SCENE-dataset.tar
cd ../../
```
## 二、环境准备
## 二、模型训练
### 2.1 下载预训练模型
```shell
export CUDA_VISIBLE_DEVICES=0,1,2,3
python3 -m paddle.distributed.launch \
--gpus="0,1,2,3" \
tools/train.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml
```
训练10epoch之后,验证集最好的正确率应该在0.95左右。
本例展示基于ResNet50_vd模型的多标签分类流程,因此首先下载ResNet50_vd的预训练模型
## 三、模型评估
```bash
mkdir pretrained
cd pretrained
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
cd ../
python3 tools/eval.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml \
-o Arch.pretrained="./output/MobileNetV1/best_model"
```
## 三、模型训练
## 四、模型预测
```shell
export CUDA_VISIBLE_DEVICES=0
python -m paddle.distributed.launch \
--gpus="0" \
tools/train.py \
-c ./configs/quick_start/ResNet50_vd_multilabel.yaml
```bash
python3 tools/infer.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml \
-o Arch.pretrained="./output/MobileNetV1/best_model"
```
得到类似下面的输出:
```
[{'class_ids': [6, 13, 17, 23, 26, 30], 'scores': [0.95683, 0.5567, 0.55211, 0.99088, 0.5943, 0.78767], 'file_name': './deploy/images/0517_2715693311.jpg', 'label_names': []}]
```
训练10epoch之后,验证集最好的正确率应该在0.72左右。
## 五、基于预测引擎预测
## 四、模型评估
### 5.1 导出inference model
```bash
python tools/eval.py \
-c ./configs/quick_start/ResNet50_vd_multilabel.yaml \
-o pretrained_model="./output/ResNet50_vd/best_model/ppcls" \
-o load_static_weights=False
python3 tools/export_model.py \
-c ./ppcls/configs/quick_start/professional/MobileNetV1_multilabel.yaml \
-o Arch.pretrained="./output/MobileNetV1/best_model"
```
inference model的路径默认在当前路径下`./inference`
评估指标采用mAP,验证集的mAP应该在0.57左右。
### 5.2 基于预测引擎预测
## 五、模型预测
首先进入deploy目录下:
```bash
python tools/infer/infer.py \
-i "./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/images/0199_434752251.jpg" \
--model ResNet50_vd \
--pretrained_model "./output/ResNet50_vd/best_model/ppcls" \
--use_gpu True \
--load_static_weights False \
--multilabel True \
--class_num 33
cd ./deploy
```
通过预测引擎推理预测:
```
python3 python/predict_cls.py \
-c configs/inference_multilabel_cls.yaml
```
得到类似下面的输出:
```
class id: 3, probability: 0.6025
class id: 23, probability: 0.5491
class id: 32, probability: 0.7006
```
\ No newline at end of file
```
0517_2715693311.jpg: class id(s): [6, 13, 17, 23, 26, 30], score(s): [0.96, 0.56, 0.55, 0.99, 0.59, 0.79], label_name(s): []
```
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 10
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 224, 224]
save_inference_dir: ./inference
use_multilabel: True
# model architecture
Arch:
name: MobileNetV1
class_num: 33
pretrained: True
# loss function config for traing/eval process
Loss:
Train:
- MultiLabelLoss:
weight: 1.0
Eval:
- MultiLabelLoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
lr:
name: Cosine
learning_rate: 0.1
regularizer:
name: 'L2'
coeff: 0.00004
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiLabelDataset
image_root: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/images/
cls_label_path: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/multilabel_train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 224
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
drop_last: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: MultiLabelDataset
image_root: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/images/
cls_label_path: ./dataset/NUS-WIDE-SCENE/NUS-SCENE-dataset/multilabel_test_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 256
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Infer:
infer_imgs: ./deploy/images/0517_2715693311.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
PostProcess:
name: MultiLabelTopk
topk: 5
class_id_map_file: None
Metric:
Train:
- HammingDistance:
- AccuracyScore:
Eval:
- HammingDistance:
- AccuracyScore:
......@@ -33,7 +33,7 @@ class MultiLabelDataset(CommonDataset):
with open(self._cls_path) as fd:
lines = fd.readlines()
for l in lines:
l = l.strip().split(" ")
l = l.strip().split("\t")
self.images.append(os.path.join(self._img_root, l[0]))
labels = l[1].split(',')
......@@ -44,13 +44,14 @@ class MultiLabelDataset(CommonDataset):
def __getitem__(self, idx):
try:
img = cv2.imread(self.images[idx])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
with open(self.images[idx], 'rb') as f:
img = f.read()
if self._transform_ops:
img = transform(img, self._transform_ops)
img = img.transpose((2, 0, 1))
label = np.array(self.labels[idx]).astype("float32")
return (img, label)
except Exception as ex:
logger.error("Exception occured when parse line: {} with msg: {}".
format(self.images[idx], ex))
......
......@@ -16,7 +16,7 @@ import importlib
from . import topk
from .topk import Topk
from .topk import Topk, MultiLabelTopk
def build_postprocess(config):
......
......@@ -45,15 +45,17 @@ class Topk(object):
class_id_map = None
return class_id_map
def __call__(self, x, file_names=None):
def __call__(self, x, file_names=None, multilabel=False):
assert isinstance(x, paddle.Tensor)
if file_names is not None:
assert x.shape[0] == len(file_names)
x = F.softmax(x, axis=-1)
x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x)
x = x.numpy()
y = []
for idx, probs in enumerate(x):
index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32")
index = probs.argsort(axis=0)[-self.topk:][::-1].astype(
"int32") if not multilabel else np.where(
probs >= 0.5)[0].astype("int32")
clas_id_list = []
score_list = []
label_name_list = []
......@@ -73,3 +75,11 @@ class Topk(object):
result["label_names"] = label_name_list
y.append(result)
return y
class MultiLabelTopk(Topk):
def __init__(self, topk=1, class_id_map_file=None):
super().__init__()
def __call__(self, x, file_names=None):
return super().__call__(x, file_names, multilabel=True)
......@@ -355,7 +355,8 @@ class Engine(object):
def export(self):
assert self.mode == "export"
model = ExportModel(self.config["Arch"], self.model)
use_multilabel = self.config["Global"].get("use_multilabel", False)
model = ExportModel(self.config["Arch"], self.model, use_multilabel)
if self.config["Global"]["pretrained_model"] is not None:
load_dygraph_pretrain(model.base_model,
self.config["Global"]["pretrained_model"])
......@@ -388,10 +389,9 @@ class ExportModel(nn.Layer):
ExportModel: add softmax onto the model
"""
def __init__(self, config, model):
def __init__(self, config, model, use_multilabel):
super().__init__()
self.base_model = model
# we should choose a final model to export
if isinstance(self.base_model, DistillationModel):
self.infer_model_name = config["infer_model_name"]
......@@ -402,10 +402,13 @@ class ExportModel(nn.Layer):
if self.infer_output_key == "features" and isinstance(self.base_model,
RecModel):
self.base_model.head = IdentityHead()
if config.get("infer_add_softmax", True):
self.softmax = nn.Softmax(axis=-1)
if use_multilabel:
self.out_act = nn.Sigmoid()
else:
self.softmax = None
if config.get("infer_add_softmax", True):
self.out_act = nn.Softmax(axis=-1)
else:
self.out_act = None
def eval(self):
self.training = False
......@@ -421,6 +424,6 @@ class ExportModel(nn.Layer):
x = x[self.infer_model_name]
if self.infer_output_key is not None:
x = x[self.infer_output_key]
if self.softmax is not None:
x = self.softmax(x)
if self.out_act is not None:
x = self.out_act(x)
return x
......@@ -50,7 +50,8 @@ def classification_eval(engine, epoch_id=0):
time_info["reader_cost"].update(time.time() - tic)
batch_size = batch[0].shape[0]
batch[0] = paddle.to_tensor(batch[0]).astype("float32")
batch[1] = batch[1].reshape([-1, 1]).astype("int64")
if not engine.config["Global"].get("use_multilabel", False):
batch[1] = batch[1].reshape([-1, 1]).astype("int64")
# image input
out = engine.model(batch[0])
# calc loss
......
......@@ -33,9 +33,10 @@ def train_epoch(engine, epoch_id, print_batch_step):
paddle.to_tensor(batch[0]['label'])
]
batch_size = batch[0].shape[0]
batch[1] = batch[1].reshape([-1, 1]).astype("int64")
if not engine.config["Global"].get("use_multilabel", False):
batch[1] = batch[1].reshape([-1, 1]).astype("int64")
engine.global_step += 1
# image input
if engine.amp:
with paddle.amp.auto_cast(custom_black_list={
......@@ -75,8 +76,8 @@ def train_epoch(engine, epoch_id, print_batch_step):
tic = time.time()
def forward(trainer, batch):
if not trainer.is_rec:
return trainer.model(batch[0])
def forward(engine, batch):
if not engine.is_rec:
return engine.model(batch[0])
else:
return trainer.model(batch[0], batch[1])
return engine.model(batch[0], batch[1])
......@@ -20,6 +20,7 @@ from .distanceloss import DistanceLoss
from .distillationloss import DistillationCELoss
from .distillationloss import DistillationGTCELoss
from .distillationloss import DistillationDMLLoss
from .multilabelloss import MultiLabelLoss
class CombinedLoss(nn.Layer):
......
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class MultiLabelLoss(nn.Layer):
"""
Multi-label loss
"""
def __init__(self, epsilon=None):
super().__init__()
if epsilon is not None and (epsilon <= 0 or epsilon >= 1):
epsilon = None
self.epsilon = epsilon
def _labelsmoothing(self, target, class_num):
if target.ndim == 1 or target.shape[-1] != class_num:
one_hot_target = F.one_hot(target, class_num)
else:
one_hot_target = target
soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon)
soft_target = paddle.reshape(soft_target, shape=[-1, class_num])
return soft_target
def _binary_crossentropy(self, input, target, class_num):
if self.epsilon is not None:
target = self._labelsmoothing(target, class_num)
cost = F.binary_cross_entropy_with_logits(
logit=input, label=target)
else:
cost = F.binary_cross_entropy_with_logits(
logit=input, label=target)
return cost
def forward(self, x, target):
if isinstance(x, dict):
x = x["logits"]
class_num = x.shape[-1]
loss = self._binary_crossentropy(x, target, class_num)
loss = loss.mean()
return {"MultiLabelLoss": loss}
......@@ -19,6 +19,8 @@ from collections import OrderedDict
from .metrics import TopkAcc, mAP, mINP, Recallk, Precisionk
from .metrics import DistillationTopkAcc
from .metrics import GoogLeNetTopkAcc
from .metrics import HammingDistance, AccuracyScore
class CombinedMetrics(nn.Layer):
def __init__(self, config_list):
......@@ -32,7 +34,8 @@ class CombinedMetrics(nn.Layer):
metric_name = list(config)[0]
metric_params = config[metric_name]
if metric_params is not None:
self.metric_func_list.append(eval(metric_name)(**metric_params))
self.metric_func_list.append(
eval(metric_name)(**metric_params))
else:
self.metric_func_list.append(eval(metric_name)())
......@@ -42,6 +45,7 @@ class CombinedMetrics(nn.Layer):
metric_dict.update(metric_func(*args, **kwargs))
return metric_dict
def build_metrics(config):
metrics_list = CombinedMetrics(copy.deepcopy(config))
return metrics_list
......@@ -15,6 +15,12 @@
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from sklearn.metrics import hamming_loss
from sklearn.metrics import accuracy_score as accuracy_metric
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.preprocessing import binarize
class TopkAcc(nn.Layer):
......@@ -198,7 +204,7 @@ class Precisionk(nn.Layer):
equal_flag = paddle.logical_and(equal_flag,
keep_mask.astype('bool'))
equal_flag = paddle.cast(equal_flag, 'float32')
Ns = paddle.arange(gallery_img_id.shape[0]) + 1
equal_flag_cumsum = paddle.cumsum(equal_flag, axis=1)
Precision_at_k = (paddle.mean(equal_flag_cumsum, axis=0) / Ns).numpy()
......@@ -232,3 +238,71 @@ class GoogLeNetTopkAcc(TopkAcc):
def forward(self, x, label):
return super().forward(x[0], label)
class MutiLabelMetric(object):
def __init__(self):
pass
def _multi_hot_encode(self, logits, threshold=0.5):
return binarize(logits, threshold=threshold)
def __call__(self, output):
output = F.sigmoid(output)
preds = self._multi_hot_encode(logits=output.numpy(), threshold=0.5)
return preds
class HammingDistance(MutiLabelMetric):
"""
Soft metric based label for multilabel classification
Returns:
The smaller the return value is, the better model is.
"""
def __init__(self):
super().__init__()
def __call__(self, output, target):
preds = super().__call__(output)
metric_dict = dict()
metric_dict["HammingDistance"] = paddle.to_tensor(
hamming_loss(target, preds))
return metric_dict
class AccuracyScore(MutiLabelMetric):
"""
Hard metric for multilabel classification
Args:
base: ["sample", "label"], default="sample"
if "sample", return metric score based sample,
if "label", return metric score based label.
Returns:
accuracy:
"""
def __init__(self, base="label"):
super().__init__()
assert base in ["sample", "label"
], 'must be one of ["sample", "label"]'
self.base = base
def __call__(self, output, target):
preds = super().__call__(output)
metric_dict = dict()
if self.base == "sample":
accuracy = accuracy_metric(target, preds)
elif self.base == "label":
mcm = multilabel_confusion_matrix(target, preds)
tns = mcm[:, 0, 0]
fns = mcm[:, 1, 0]
tps = mcm[:, 1, 1]
fps = mcm[:, 0, 1]
accuracy = (sum(tps) + sum(tns)) / (
sum(tps) + sum(tns) + sum(fns) + sum(fps))
precision = sum(tps) / (sum(tps) + sum(fps))
recall = sum(tps) / (sum(tps) + sum(fns))
F1 = 2 * (accuracy * recall) / (accuracy + recall)
metric_dict["AccuracyScore"] = paddle.to_tensor(accuracy)
return metric_dict
......@@ -4,4 +4,4 @@
# python3.7 tools/train.py -c ./ppcls/configs/ImageNet/ResNet/ResNet50.yaml
# for multi-cards train
python3.7 -m paddle.distributed.launch --gpus="0,1,2,3" tools/train.py -c ./ppcls/configs/ImageNet/ResNet/ResNet50.yaml
\ No newline at end of file
python3.7 -m paddle.distributed.launch --gpus="0,1,2,3" tools/train.py -c ./ppcls/configs/ImageNet/ResNet/ResNet50.yaml
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册