diff --git a/README.md b/README.md index 7acf4147f5037c26d0160c25077f315ca4ee9655..aa2416dd9986a0ae85433a2c60e633e8088329d5 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Guidelines: | ------------------------------------------------------- | ----------------------------- | ---------- | -------------- | ------------ | --------------- | ------------ | ----------- | | [YuNet](./models/face_detection_yunet) | Face Detection | 160x120 | 1.45 | 6.22 | 12.18 | 4.04 | 86.69 | | [SFace](./models/face_recognition_sface) | Face Recognition | 112x112 | 8.65 | 99.20 | 24.88 | 46.25 | --- | +| [FER](./models/facial_expression_recognition/) | Facial Expression Recognition | 112x112 | 4.43 | 49.86 | 31.07 | 108.53\* | --- | | [LPD-YuNet](./models/license_plate_detection_yunet/) | License Plate Detection | 320x240 | --- | 168.03 | 56.12 | 29.53 | --- | | [YOLOX](./models/object_detection_yolox/) | Object Detection | 640x640 | 176.68 | 1496.70 | 388.95 | 420.98 | --- | | [NanoDet](./models/object_detection_nanodet/) | Object Detection | 416x416 | 157.91 | 220.36 | 64.94 | 116.64 | --- | @@ -62,6 +63,10 @@ Some examples are listed below. You can find more in the directory of each model ![largest selfie](./models/face_detection_yunet/examples/largest_selfie.jpg) +### Facial Expression Recognition with Progressive Teacher(./models/facial_expression_recognition/) + +![fer demo](./models/facial_expression_recognition/examples/selfie.jpg) + ### Human Segmentation with [PP-HumanSeg](./models/human_segmentation_pphumanseg/) ![messi](./models/human_segmentation_pphumanseg/examples/messi.jpg) diff --git a/benchmark/config/facial_expression_recognition.yaml b/benchmark/config/facial_expression_recognition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e0a8f988f6c4e9fedc62f07a557785756600314 --- /dev/null +++ b/benchmark/config/facial_expression_recognition.yaml @@ -0,0 +1,16 @@ +Benchmark: + name: "Facial Expression Recognition Benchmark" + type: "Recognition" + data: + path: "benchmark/data/facial_expression_recognition/fer_evaluation" + files: ["RAF_test_0_61.jpg", "RAF_test_0_30.jpg", "RAF_test_6_25.jpg"] + metric: # 'sizes' is omitted since this model requires input of fixed size + warmup: 30 + repeat: 10 + reduction: "median" + backend: "default" + target: "cpu" + +Model: + name: "FacialExpressionRecog" + modelPath: "models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx" diff --git a/benchmark/download_data.py b/benchmark/download_data.py index 1fb003e9f37942e143ccfef37574e259c9dc2188..b0c515b05a5553be4a5ae20f0b2479429d94e2fc 100644 --- a/benchmark/download_data.py +++ b/benchmark/download_data.py @@ -173,6 +173,10 @@ data_downloaders = dict( url='https://drive.google.com/u/0/uc?id=1BRIozREIzqkm_aMQ581j93oWoS-6TLST&export=download', sha='03892b9036c58d9400255ff73858caeec1f46609', filename='face_recognition.zip'), + facial_expression_recognition=Downloader(name='facial_expression_recognition', + url='https://drive.google.com/u/0/uc?id=13ZE0Pz302z1AQmBmYGuowkTiEXVLyFFZ&export=download', + sha='8f757559820c8eaa1b1e0065f9c3bbbd4f49efe2', + filename='facial_expression_recognition.zip'), text=Downloader(name='text', url='https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download', sha='a40cf095ceb77159ddd2a5902f3b4329696dd866', diff --git a/benchmark/utils/dataloaders/recognition.py b/benchmark/utils/dataloaders/recognition.py index 460e610c4c1ff7e22ee9b5c7006bfaaefb960642..62c77f23976dd3527e83cec322e51d5c4190f2d4 100644 --- a/benchmark/utils/dataloaders/recognition.py +++ b/benchmark/utils/dataloaders/recognition.py @@ -16,7 +16,10 @@ class RecognitionImageLoader(_BaseImageLoader): def _load_label(self): labels = dict.fromkeys(self._files, None) for filename in self._files: - labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])), ndmin=2) + if os.path.exists(os.path.join(self._path, '{}.txt'.format(filename[:-4]))): + labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])), ndmin=2) + else: + labels[filename] = None return labels def __iter__(self): diff --git a/benchmark/utils/metrics/recognition.py b/benchmark/utils/metrics/recognition.py index d4f882abce91fde96a3e785c08bcf018c45cd5eb..716b969f56ef3f0f9a1fbdbd8f93119b18551799 100644 --- a/benchmark/utils/metrics/recognition.py +++ b/benchmark/utils/metrics/recognition.py @@ -12,12 +12,20 @@ class Recognition(BaseMetric): img, bboxes = args self._timer.reset() - for idx, bbox in enumerate(bboxes): + if bboxes is not None: + for idx, bbox in enumerate(bboxes): + for _ in range(self._warmup): + model.infer(img, bbox) + for _ in range(self._repeat): + self._timer.start() + model.infer(img, bbox) + self._timer.stop() + else: for _ in range(self._warmup): - model.infer(img, bbox) + model.infer(img, None) for _ in range(self._repeat): self._timer.start() - model.infer(img, bbox) + model.infer(img, None) self._timer.stop() return self._getResult() \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py index 5beffe0e3914363f6e461d05f17bba8b195652b8..79d65fa40ccb991e4968c2307c562f5a74782c2d 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -14,6 +14,7 @@ from .handpose_estimation_mediapipe.mp_handpose import MPHandPose from .license_plate_detection_yunet.lpd_yunet import LPD_YuNet from .object_detection_nanodet.nanodet import NanoDet from .object_detection_yolox.yolox import YoloX +from .facial_expression_recognition.facial_fer_model import FacialExpressionRecog class Registery: def __init__(self, name): @@ -43,4 +44,4 @@ MODELS.register(MPHandPose) MODELS.register(LPD_YuNet) MODELS.register(NanoDet) MODELS.register(YoloX) - +MODELS.register(FacialExpressionRecog) diff --git a/models/facial_expression_recognition/README.md b/models/facial_expression_recognition/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df317806c91a3d1de040d4d76f6abd3ad8bb702d --- /dev/null +++ b/models/facial_expression_recognition/README.md @@ -0,0 +1,40 @@ + +# Progressive Teacher + +Progressive Teacher: [Boosting Facial Expression Recognition by A Semi-Supervised Progressive Teacher](https://scholar.google.com/citations?view_op=view_citation&hl=zh-CN&user=OCwcfAwAAAAJ&citation_for_view=OCwcfAwAAAAJ:u5HHmVD_uO8C) + +Note: +- Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN). +- [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised). +- [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/opencv). + +Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html). + +| Models | Accuracy | +|-------------|----------| +| Progressive Teacher | 88.27% | + + +## Demo + +***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep). + +Run the following command to try the demo: +```shell +# recognize the facial expression on images +python demo.py --input /path/to/image +``` + +### Example outputs + +Note: Zoom in to to see the recognized facial expression in the top-left corner of each face boxes. + +![fer demo](./examples/selfie.jpg) + +## License + +All files in this directory are licensed under [Apache 2.0 License](./LICENSE). + +## Reference + +- https://ieeexplore.ieee.org/abstract/document/9629313 diff --git a/models/facial_expression_recognition/demo.py b/models/facial_expression_recognition/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..dcda5d2e50478e65a58a8d062ac8fbf5e3da139d --- /dev/null +++ b/models/facial_expression_recognition/demo.py @@ -0,0 +1,131 @@ +import sys +import argparse +import copy +import datetime + +import numpy as np +import cv2 as cv + +from facial_fer_model import FacialExpressionRecog + +sys.path.append('../face_detection_yunet') +from yunet import YuNet + + +def str2bool(v): + if v.lower() in ['on', 'yes', 'true', 'y', 't']: + return True + elif v.lower() in ['off', 'no', 'false', 'n', 'f']: + return False + else: + raise NotImplementedError + + +backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA] +targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16] +help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA" +help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16" +try: + backends += [cv.dnn.DNN_BACKEND_TIMVX] + targets += [cv.dnn.DNN_TARGET_NPU] + help_msg_backends += "; {:d}: TIMVX" + help_msg_targets += "; {:d}: NPU" +except: + print('This version of OpenCV does not support TIM-VX and NPU. Visit https://github.com/opencv/opencv/wiki/TIM-VX-Backend-For-Running-OpenCV-On-NPU for more information.') + +parser = argparse.ArgumentParser(description='Facial Expression Recognition') +parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.') +parser.add_argument('--model', '-fm', type=str, default='./facial_expression_recognition_mobilefacenet_2022july.onnx', help='Path to the facial expression recognition model.') +parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends)) +parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets)) +parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.') +parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.') +args = parser.parse_args() + + +def visualize(image, det_res, fer_res, box_color=(0, 255, 0), text_color=(0, 0, 255)): + + print('%s %3d faces detected.' % (datetime.datetime.now(), len(det_res))) + + output = image.copy() + landmark_color = [ + (255, 0, 0), # right eye + (0, 0, 255), # left eye + (0, 255, 0), # nose tip + (255, 0, 255), # right mouth corner + (0, 255, 255) # left mouth corner + ] + + for ind, (det, fer_type) in enumerate(zip(det_res, fer_res)): + bbox = det[0:4].astype(np.int32) + fer_type = FacialExpressionRecog.getDesc(fer_type) + print("Face %2d: %d %d %d %d %s." % (ind, bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3], fer_type)) + cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2) + cv.putText(output, fer_type, (bbox[0], bbox[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, text_color) + landmarks = det[4:14].astype(np.int32).reshape((5, 2)) + for idx, landmark in enumerate(landmarks): + cv.circle(output, landmark, 2, landmark_color[idx], 2) + return output + + +def process(detect_model, fer_model, frame): + h, w, _ = frame.shape + detect_model.setInputSize([w, h]) + dets = detect_model.infer(frame) + + if dets is None: + return False, None, None + + fer_res = np.zeros(0, dtype=np.int8) + for face_points in dets: + fer_res = np.concatenate((fer_res, fer_model.infer(frame, face_points[:-1])), axis=0) + return True, dets, fer_res + + +if __name__ == '__main__': + detect_model = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2022mar.onnx') + + fer_model = FacialExpressionRecog(modelPath=args.model, + backendId=args.backend, + targetId=args.target) + + # If input is an image + if args.input is not None: + image = cv.imread(args.input) + + # Get detection and fer results + status, dets, fer_res = process(detect_model, fer_model, image) + + if status: + # Draw results on the input image + image = visualize(image, dets, fer_res) + + # Save results + if args.save: + cv.imwrite('result.jpg', image) + print('Results saved to result.jpg\n') + + # Visualize results in a new window + if args.vis: + cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) + cv.imshow(args.input, image) + cv.waitKey(0) + else: # Omit input to call default camera + deviceId = 0 + cap = cv.VideoCapture(deviceId) + + while cv.waitKey(1) < 0: + hasFrame, frame = cap.read() + if not hasFrame: + print('No frames grabbed!') + break + + # Get detection and fer results + status, dets, fer_res = process(detect_model, fer_model, frame) + + if status: + # Draw results on the input image + frame = visualize(frame, dets, fer_res) + + # Visualize results in a new window + cv.imshow('FER Demo', frame) diff --git a/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july-int8-quantized.onnx b/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july-int8-quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..72d61af2b0f4abf4933e3e4cef8c8b474bf45df3 --- /dev/null +++ b/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july-int8-quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541597ca330e0e3babe883d0fa6ab121b0e3da65c9cc099c05ff274b3106a658 +size 1340132 diff --git a/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx b/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx new file mode 100644 index 0000000000000000000000000000000000000000..67dd024c304015a24121197216b63462ba0336d0 --- /dev/null +++ b/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f61307602fc089ce20488a31d4e4614e3c9753a7d6c41578c854858b183e1a9 +size 4791892 diff --git a/models/facial_expression_recognition/facial_fer_model.py b/models/facial_expression_recognition/facial_fer_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a739dd7e146fcd7620f7b57dca887cedbf8847 --- /dev/null +++ b/models/facial_expression_recognition/facial_fer_model.py @@ -0,0 +1,178 @@ +# This file is part of OpenCV Zoo project. +# It is subject to the license terms in the LICENSE file found in the same directory. +# +# Copyright (C) 2022, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved. +# Third party copyrights are property of their respective owners. + +import numpy as np +import cv2 as cv + +class FacialExpressionRecog: + def __init__(self, modelPath, backendId=0, targetId=0): + self._modelPath = modelPath + self._backendId = backendId + self._targetId = targetId + + self._model = cv.dnn.readNet(self._modelPath) + self._model.setPreferableBackend(self._backendId) + self._model.setPreferableTarget(self._targetId) + + self._align_model = FaceAlignment() + + self._inputNames = 'data' + self._outputNames = ['label'] + self._inputSize = [112, 112] + self._mean = np.array([0.5, 0.5, 0.5])[np.newaxis, np.newaxis, :] + self._std = np.array([0.5, 0.5, 0.5])[np.newaxis, np.newaxis, :] + + @property + def name(self): + return self.__class__.__name__ + + def setBackend(self, backend_id): + self._backendId = backend_id + self._model.setPreferableBackend(self._backendId) + + def setTarget(self, target_id): + self._targetId = target_id + self._model.setPreferableTarget(self._targetId) + + def _preprocess(self, image, bbox): + if bbox is not None: + image = self._align_model.get_align_image(image, bbox[4:].reshape(-1, 2)) + image = cv.cvtColor(image, cv.COLOR_BGR2RGB) + image = image.astype(np.float32, copy=False) / 255.0 + image -= self._mean + image /= self._std + return cv.dnn.blobFromImage(image) + + def infer(self, image, bbox=None): + # Preprocess + inputBlob = self._preprocess(image, bbox) + + # Forward + self._model.setInput(inputBlob, self._inputNames) + outputBlob = self._model.forward(self._outputNames) + + # Postprocess + results = self._postprocess(outputBlob) + + return results + + def _postprocess(self, outputBlob): + result = np.argmax(outputBlob[0], axis=1).astype(np.uint8) + return result + + @staticmethod + def getDesc(ind): + _expression_enum = ["angry", "disgust", "fearful", "happy", "neutral", "sad", "surprised"] + return _expression_enum[ind] + + +class FaceAlignment(): + def __init__(self, reflective=False): + self._std_points = np.array([[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041]]) + self.reflective = reflective + + def __tformfwd(self, trans, uv): + uv = np.hstack((uv, np.ones((uv.shape[0], 1)))) + xy = np.dot(uv, trans) + xy = xy[:, 0:-1] + return xy + + def __tforminv(self, trans, uv): + Tinv = np.linalg.inv(trans) + xy = self.__tformfwd(Tinv, uv) + return xy + + def __findNonreflectiveSimilarity(self, uv, xy, options=None): + options = {"K": 2} + + K = options["K"] + M = xy.shape[0] + x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector + y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector + # print '--->x, y:\n', x, y + + tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))) + tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) + X = np.vstack((tmp1, tmp2)) + # print '--->X.shape: ', X.shape + # print 'X:\n', X + + u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector + v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector + U = np.vstack((u, v)) + # print '--->U.shape: ', U.shape + # print 'U:\n', U + + # We know that X * r = U + if np.linalg.matrix_rank(X) >= 2 * K: + r, _, _, _ = np.linalg.lstsq(X, U, rcond=-1) + # print(r, X, U, sep="\n") + r = np.squeeze(r) + else: + raise Exception("cp2tform:twoUniquePointsReq") + + sc = r[0] + ss = r[1] + tx = r[2] + ty = r[3] + + Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]]) + T = np.linalg.inv(Tinv) + T[:, 2] = np.array([0, 0, 1]) + + return T, Tinv + + def __findSimilarity(self, uv, xy, options=None): + options = {"K": 2} + + # uv = np.array(uv) + # xy = np.array(xy) + + # Solve for trans1 + trans1, trans1_inv = self.__findNonreflectiveSimilarity(uv, xy, options) + + # manually reflect the xy data across the Y-axis + xyR = xy + xyR[:, 0] = -1 * xyR[:, 0] + # Solve for trans2 + trans2r, trans2r_inv = self.__findNonreflectiveSimilarity(uv, xyR, options) + + # manually reflect the tform to undo the reflection done on xyR + TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) + trans2 = np.dot(trans2r, TreflectY) + + # Figure out if trans1 or trans2 is better + xy1 = self.__tformfwd(trans1, uv) + norm1 = np.linalg.norm(xy1 - xy) + xy2 = self.__tformfwd(trans2, uv) + norm2 = np.linalg.norm(xy2 - xy) + + if norm1 <= norm2: + return trans1, trans1_inv + else: + trans2_inv = np.linalg.inv(trans2) + return trans2, trans2_inv + + def __get_similarity_transform(self, src_pts, dst_pts): + if self.reflective: + trans, trans_inv = self.__findSimilarity(src_pts, dst_pts) + else: + trans, trans_inv = self.__findNonreflectiveSimilarity(src_pts, dst_pts) + return trans, trans_inv + + def __cvt_tform_mat_for_cv2(self, trans): + cv2_trans = trans[:, 0:2].T + return cv2_trans + + def get_similarity_transform_for_cv2(self, src_pts, dst_pts): + trans, trans_inv = self.__get_similarity_transform(src_pts, dst_pts) + cv2_trans = self.__cvt_tform_mat_for_cv2(trans) + return cv2_trans, trans + + def get_align_image(self, image, lm5_points): + assert lm5_points is not None + tfm, trans = self.get_similarity_transform_for_cv2(lm5_points, self._std_points) + return cv.warpAffine(image, tfm, (112, 112)) diff --git a/tools/quantize/inc_configs/fer.yaml b/tools/quantize/inc_configs/fer.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1370c018729b3814ddab0246be8ada13d54bf80a --- /dev/null +++ b/tools/quantize/inc_configs/fer.yaml @@ -0,0 +1,26 @@ +version: 1.0 + +model: # mandatory. used to specify model specific information. + name: fer + framework: onnxrt_qlinearops # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. + +quantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space. + approach: post_training_static_quant # optional. default value is post_training_static_quant. + calibration: + dataloader: + batch_size: 1 + dataset: + dummy: + shape: [1, 3, 112, 112] + low: -1.0 + high: 1.0 + dtype: float32 + label: True + +tuning: + accuracy_criterion: + relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. + exit_policy: + timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. + max_trials: 50 # optional. max tune times. default value is 100. combine with timeout field to decide when to exit. + random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/tools/quantize/quantize-inc.py b/tools/quantize/quantize-inc.py index 8005c4fcd1503f1a7a70a91c1b35c27826dce8b4..f7bbdb57d4aaaad6e77970525b5cef267a8eb96e 100644 --- a/tools/quantize/quantize-inc.py +++ b/tools/quantize/quantize-inc.py @@ -5,12 +5,39 @@ import cv2 as cv import onnx from neural_compressor.experimental import Quantization, common +from neural_compressor.experimental.metric import BaseMetric + + +class Accuracy(BaseMetric): + def __init__(self, *args): + self.pred_list = [] + self.label_list = [] + self.samples = 0 + + def update(self, predict, label): + predict = np.array(predict) + label = np.array(label) + self.pred_list.append(np.argmax(predict[0])) + self.label_list.append(label[0][0]) + self.samples += 1 + + def reset(self): + self.pred_list = [] + self.label_list = [] + self.samples = 0 + + def result(self): + correct_num = np.sum(np.array(self.pred_list) == np.array(self.label_list)) + return correct_num / self.samples + class Quantize: - def __init__(self, model_path, config_path, custom_dataset=None): + def __init__(self, model_path, config_path, custom_dataset=None, eval_dataset=None, metric=None): self.model_path = model_path self.config_path = config_path self.custom_dataset = custom_dataset + self.eval_dataset = eval_dataset + self.metric = metric def run(self): print('Quantizing (int8) with Intel\'s Neural Compressor:') @@ -21,31 +48,39 @@ class Quantize: model = onnx.load(self.model_path) quantizer = Quantization(self.config_path) + quantizer.model = common.Model(model) if self.custom_dataset is not None: quantizer.calib_dataloader = common.DataLoader(self.custom_dataset) - quantizer.model = common.Model(model) + if self.eval_dataset is not None: + quantizer.eval_dataloader = common.DataLoader(self.eval_dataset) + if self.metric is not None: + quantizer.metric = common.Metric(metric_cls=self.metric, name='metric') q_model = quantizer() q_model.save(output_name) + class Dataset: - def __init__(self, root, size=None, dim='chw', mean=0.0, std=1.0, swapRB=False, toFP32=False): + def __init__(self, root, size=None, dim='chw', scale=1.0, mean=0.0, std=1.0, swapRB=False, toFP32=False): self.root = root self.size = size self.dim = dim + self.scale = scale self.mean = mean self.std = std self.swapRB = swapRB self.toFP32 = toFP32 - self.image_list = self.load_image_list(self.root) + self.image_list, self.label_list = self.load_image_list(self.root) def load_image_list(self, path): image_list = [] + label_list = [] for f in os.listdir(path): if not f.endswith('.jpg'): continue image_list.append(os.path.join(path, f)) - return image_list + label_list.append(1) + return image_list, label_list def __getitem__(self, idx): img = cv.imread(self.image_list[idx]) @@ -59,18 +94,35 @@ class Dataset: if self.toFP32: img = img.astype(np.float32) + img = img * self.scale img = img - self.mean img = img / self.std if self.dim == 'chw': - img = img.transpose(2, 0, 1) # hwc -> chw + img = img.transpose(2, 0, 1) # hwc -> chw - return img, 1 + return img, self.label_list[idx] def __len__(self): return len(self.image_list) -models=dict( + +class FerDataset(Dataset): + def __init__(self, root, size=None, dim='chw', scale=1.0, mean=0.0, std=1.0, swapRB=False, toFP32=False): + super(FerDataset, self).__init__(root, size, dim, scale, mean, std, swapRB, toFP32) + + def load_image_list(self, path): + image_list = [] + label_list = [] + for f in os.listdir(path): + if not f.endswith('.jpg'): + continue + image_list.append(os.path.join(path, f)) + label_list.append(int(f.split("_")[2])) + return image_list, label_list + + +models = dict( mobilenetv1=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr.onnx', config_path='./inc_configs/mobilenet.yaml'), mobilenetv2=Quantize(model_path='../../models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr.onnx', @@ -84,6 +136,11 @@ models=dict( lpd_yunet=Quantize(model_path='../../models/license_plate_detection_yunet/license_plate_detection_lpd_yunet_2022may.onnx', config_path='./inc_configs/lpd_yunet.yaml', custom_dataset=Dataset(root='../../benchmark/data/license_plate_detection', size=(320, 240), dim='chw', toFP32=True)), + fer=Quantize(model_path='../../models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx', + config_path='./inc_configs/fer.yaml', + custom_dataset=FerDataset(root='../../benchmark/data/facial_expression_recognition/fer_calibration', size=(112, 112), toFP32=True, swapRB=True, scale=1./255, mean=0.5, std=0.5), + eval_dataset=FerDataset(root='../../benchmark/data/facial_expression_recognition/fer_evaluation', size=(112, 112), toFP32=True, swapRB=True, scale=1./255, mean=0.5, std=0.5), + metric=Accuracy), ) if __name__ == '__main__': @@ -97,4 +154,3 @@ if __name__ == '__main__': for selected_model_name in selected_models: q = models[selected_model_name] q.run() -