diff --git a/demo/semantic_segmentation/train.py b/demo/semantic_segmentation/train.py index 55f3ba596e84e52f12cbd9ce9b475f7a0eb50ec8..b2fcb3b242f4014782015f3c2ce2f714d0fdb5fb 100644 --- a/demo/semantic_segmentation/train.py +++ b/demo/semantic_segmentation/train.py @@ -1,16 +1,31 @@ import paddle +import numpy as np import paddlehub as hub from paddlehub.finetune.trainer import Trainer - from paddlehub.datasets import OpticDiscSeg from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize +from paddlehub.vision.utils import ConfusionMatrix if __name__ == "__main__": - transform = Compose([Resize(target_size=(512, 512)), Normalize()]) - train_reader = OpticDiscSeg(transform) + train_transforms = Compose([Resize(target_size=(512, 512)), Normalize()]) + eval_transforms = Compose([Normalize()]) + train_reader = OpticDiscSeg(train_transforms) + eval_reader = OpticDiscSeg(eval_transforms, mode='val') model = hub.Module(name='ocrnet_hrnetw18_voc', num_classes=2) - scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) - optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) - trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) - trainer.train(train_reader, epochs=20, batch_size=4, eval_dataset=train_reader, log_interval=10, save_interval=4) \ No newline at end of file + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Momentum(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + + cfm = ConfusionMatrix(eval_reader.num_classes, streaming=True) + model.eval() + for imgs, labels in eval_reader: + imgs = imgs[np.newaxis, :, :, :] + preds = model(paddle.to_tensor(imgs))[0] + preds = paddle.argmax(preds, axis=1, keepdim=True).numpy() + labels = labels[np.newaxis, :, :, :] + ignores = labels != eval_reader.ignore_index + cfm.calculate(preds, labels, ignores) + _, miou = cfm.mean_iou() + print('miou: {:.4f}'.format(miou)) diff --git a/paddlehub/vision/utils.py b/paddlehub/vision/utils.py index 39a8b549a018a42dda5e4ac18365bee0a052e69b..826b6a281246c1ed6c7cc4b7f42f35bd224ac204 100644 --- a/paddlehub/vision/utils.py +++ b/paddlehub/vision/utils.py @@ -21,6 +21,7 @@ import PIL import numpy as np import matplotlib as plt import paddle.nn.functional as F +from scipy.sparse import csr_matrix def is_image_file(filename: str) -> bool: @@ -187,7 +188,7 @@ def npmax(array: np.ndarray) -> Tuple[int]: return i, j -def visualize(image: Union[np.ndarray, str], result: np.ndarray, weight: float = 0.6) -> np.ndarray: +def visualize(image: Union[np.ndarray, str], result: np.ndarray, weight: float = 0.6) -> np.ndarray: """ Convert segmentation result to color image, and save added image. @@ -289,4 +290,130 @@ def reverse_transform(pred: paddle.Tensor, ori_shape: List[int], transforms: Lis pred = pred[:, :, 0:h, 0:w] else: raise Exception("Unexpected info '{}' in im_info".format(item[0])) - return pred \ No newline at end of file + return pred + + +class ConfusionMatrix(object): + """ + Confusion Matrix for segmentation evaluation. + + Args: + num_classes (int): Number of categories of the confusion matrix. + streaming (bool): Whether to use streaming mode. If the value is set to True, the data will be + accumulated every time the `calculate` interface is called. Default to False. + """ + + def __init__(self, num_classes: int, streaming: bool = False): + self.confusion_matrix = np.zeros([num_classes, num_classes], dtype='int64') + self.num_classes = num_classes + self.streaming = streaming + + def calculate(self, pred, label, ignore=None): + # If not in streaming mode, clear matrix everytime when call `calculate` + if not self.streaming: + self.zero_matrix() + + mask = np.array(ignore) == 1 + + label = np.asarray(label)[mask] + pred = np.asarray(pred)[mask] + one = np.ones_like(pred) + # Accumuate ([row=label, col=pred], 1) into sparse matrix + spm = csr_matrix((one, (label, pred)), shape=(self.num_classes, self.num_classes)) + spm = spm.todense() + self.confusion_matrix += spm + + def zero_matrix(self): + """ Clear confusion matrix """ + self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], dtype='int64') + + def mean_iou(self) -> float: + iou_list = [] + avg_iou = 0 + # TODO: use numpy sum axis api to simpliy + vji = np.zeros(self.num_classes, dtype=int) + vij = np.zeros(self.num_classes, dtype=int) + for j in range(self.num_classes): + v_j = 0 + for i in range(self.num_classes): + v_j += self.confusion_matrix[j][i] + vji[j] = v_j + + for i in range(self.num_classes): + v_i = 0 + for j in range(self.num_classes): + v_i += self.confusion_matrix[j][i] + vij[i] = v_i + + for c in range(self.num_classes): + total = vji[c] + vij[c] - self.confusion_matrix[c][c] + if total == 0: + iou = 0 + else: + iou = float(self.confusion_matrix[c][c]) / total + avg_iou += iou + iou_list.append(iou) + avg_iou = float(avg_iou) / float(self.num_classes) + return np.array(iou_list), avg_iou + + def accuracy(self) -> float: + total = self.confusion_matrix.sum() + total_right = 0 + for c in range(self.num_classes): + total_right += self.confusion_matrix[c][c] + if total == 0: + avg_acc = 0 + else: + avg_acc = float(total_right) / total + + vij = np.zeros(self.num_classes, dtype=int) + for i in range(self.num_classes): + v_i = 0 + for j in range(self.num_classes): + v_i += self.confusion_matrix[j][i] + vij[i] = v_i + + acc_list = [] + for c in range(self.num_classes): + if vij[c] == 0: + acc = 0 + else: + acc = self.confusion_matrix[c][c] / float(vij[c]) + acc_list.append(acc) + return np.array(acc_list), avg_acc + + def kappa(self) -> float: + vji = np.zeros(self.num_classes) + vij = np.zeros(self.num_classes) + for j in range(self.num_classes): + v_j = 0 + for i in range(self.num_classes): + v_j += self.confusion_matrix[j][i] + vji[j] = v_j + + for i in range(self.num_classes): + v_i = 0 + for j in range(self.num_classes): + v_i += self.confusion_matrix[j][i] + vij[i] = v_i + + total = self.confusion_matrix.sum() + + # avoid spillovers + # TODO: is it reasonable to hard code 10000.0? + total = float(total) / 10000.0 + vji = vji / 10000.0 + vij = vij / 10000.0 + + tp = 0 + tc = 0 + for c in range(self.num_classes): + tp += vji[c] * vij[c] + tc += self.confusion_matrix[c][c] + + tc = tc / 10000.0 + pe = tp / (total * total) + po = tc / total + + kappa = (po - pe) / (1 - pe) + return kappa