diff --git a/contrib/RealTimeHumanSeg/python/infer.py b/contrib/RealTimeHumanSeg/python/infer.py index 277cab3a82721a01b7f5a93dd8fb360c8306bc5a..669b2f59ae633c8cfaa1dfc101516ad0f72d26dd 100644 --- a/contrib/RealTimeHumanSeg/python/infer.py +++ b/contrib/RealTimeHumanSeg/python/infer.py @@ -15,10 +15,6 @@ import os import sys -import ast -import time -import json -import argparse import numpy as np import cv2 @@ -26,7 +22,10 @@ import cv2 import paddle.fluid as fluid -def LoadModel(model_dir, use_gpu=False): +def load_model(model_dir, use_gpu=False): + """ + Load model files and init paddle predictor + """ prog_file = os.path.join(model_dir, '__model__') params_file = os.path.join(model_dir, '__params__') config = fluid.core.AnalysisConfig(prog_file, params_file) @@ -42,98 +41,117 @@ def LoadModel(model_dir, use_gpu=False): class HumanSeg: + """ + Human Segmentation Class + """ def __init__(self, model_dir, mean, scale, eval_size, use_gpu=False): self.mean = np.array(mean).reshape((3, 1, 1)) self.scale = np.array(scale).reshape((3, 1, 1)) self.eval_size = eval_size - self.predictor = LoadModel(model_dir, use_gpu) + self.predictor = load_model(model_dir, use_gpu) - def Preprocess(self, image): - im = cv2.resize( + def preprocess(self, image): + """ + preprocess image: hwc_rgb to chw_bgr + """ + img_mat = cv2.resize( image, self.eval_size, fx=0, fy=0, interpolation=cv2.INTER_CUBIC) # HWC -> CHW - im = im.swapaxes(1, 2) - im = im.swapaxes(0, 1) + img_mat = img_mat.swapaxes(1, 2) + img_mat = img_mat.swapaxes(0, 1) # Convert to float - im = im[:, :, :].astype('float32') - # im = (im - mean) * scale - im = im - self.mean - im = im * self.scale - im = im[np.newaxis, :, :, :] - return im - - def Postprocess(self, image, output_data): + img_mat = img_mat[:, :, :].astype('float32') + # img_mat = (img_mat - mean) * scale + img_mat = img_mat - self.mean + img_mat = img_mat * self.scale + img_mat = img_mat[np.newaxis, :, :, :] + return img_mat + + def postprocess(self, image, output_data): + """ + postprocess result: merge background with segmentation result + """ mask = output_data[0, 1, :, :] mask = cv2.resize(mask, (image.shape[1], image.shape[0])) scoremap = np.repeat(mask[:, :, np.newaxis], 3, axis=2) - bg = np.ones_like(scoremap) * 255 - merge_im = (scoremap * image + (1 - scoremap) * bg).astype(np.uint8) + bg_im = np.ones_like(scoremap) * 255 + merge_im = (scoremap * image + (1 - scoremap) * bg_im).astype(np.uint8) return merge_im - def Predict(self, image): + def run_predict(self, image): + """ + run predict: return segmentation image mat + """ ori_im = image.copy() - im = self.Preprocess(image) - im_tensor = fluid.core.PaddleTensor(im.copy().astype('float32')) + im_mat = self.preprocess(ori_im) + im_tensor = fluid.core.PaddleTensor(im_mat.copy().astype('float32')) output_data = self.predictor.run([im_tensor])[0] output_data = output_data.as_ndarray() - return self.Postprocess(image, output_data) + return self.postprocess(image, output_data) -# Do Predicting on a image -def PredictImage(seg, image_path): - im = cv2.imread(input_path) - im = seg.Predict(im) - cv2.imwrite('result.jpeg', im) +def predict_image(seg, image_path): + """ + Do Predicting on a image + """ + img_mat = cv2.imread(image_path) + img_mat = seg.run_predict(img_mat) + cv2.imwrite('result.jpeg', img_mat) -# Do Predicting on a video -def PredictVideo(seg, video_path): +def predict_video(seg, video_path): + """ + Do Predicting on a video + """ cap = cv2.VideoCapture(video_path) - if cap.isOpened() == False: + if not cap.isOpened(): print("Error opening video stream or file") return - w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) - h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # Result Video Writer out = cv2.VideoWriter('result.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, - (int(w), int(h))) + (width, height)) # Start capturing from video - while (cap.isOpened()): + while cap.isOpened(): ret, frame = cap.read() - if ret == True: - im = seg.Predict(frame) - out.write(im) + if ret: + img_mat = seg.run_predict(frame) + out.write(img_mat) else: break cap.release() out.release() - -# Do Predicting on a camera video stream -def PredictCamera(seg): +def predict_camera(seg): + """ + Do Predicting on a camera video stream + """ cap = cv2.VideoCapture(0) - if cap.isOpened() == False: + if not cap.isOpened(): print("Error opening video stream or file") return # Start capturing from video - while (cap.isOpened()): + while cap.isOpened(): ret, frame = cap.read() - if ret == True: - im = seg.Predict(frame) - cv2.imshow('Frame', im) + if ret: + img_mat = seg.run_predict(frame) + cv2.imshow('Frame', img_mat) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() - -if __name__ == "__main__": - if len(sys.argv) < 3: +def main(argv): + """ + Entrypoint of the script + """ + if len(argv) < 3: print('Usage: python infer.py /path/to/model/ /path/to/video') - exit(0) + return model_dir = sys.argv[1] input_path = sys.argv[2] @@ -144,5 +162,9 @@ if __name__ == "__main__": eval_size = (513, 513) seg = HumanSeg(model_dir, mean, scale, eval_size, use_gpu) # Run Predicting on a video and result will be saved as result.avi - PredictCamera(seg) - #PredictVideo(seg, input_path) + # predict_camera(seg) + predict_video(seg, input_path) + + +if __name__ == "__main__": + main(sys.argv)