diff --git a/contrib/RealTimeHumanSeg/python/infer.py b/contrib/RealTimeHumanSeg/python/infer.py index 225347f6b4911b5291549f70616862e760b79dc7..0befe36a64341e355feacddecfa04b7abc5db254 100644 --- a/contrib/RealTimeHumanSeg/python/infer.py +++ b/contrib/RealTimeHumanSeg/python/infer.py @@ -22,7 +22,7 @@ import cv2 import paddle.fluid as fluid -def LoadModel(model_dir, use_gpu=False): +def load_model(model_dir, use_gpu=False): """ Load model files and init paddle predictor """ @@ -41,78 +41,94 @@ def LoadModel(model_dir, use_gpu=False): class HumanSeg: + """ + Human Segmentation Class + """ def __init__(self, model_dir, mean, scale, eval_size, use_gpu=False): self.mean = np.array(mean).reshape((3, 1, 1)) self.scale = np.array(scale).reshape((3, 1, 1)) self.eval_size = eval_size - self.predictor = LoadModel(model_dir, use_gpu) - - def Preprocess(self, image): - im = cv2.resize(image, - self.eval_size, - fx=0, - fy=0, - interpolation=cv2.INTER_LINEAR) + self.predictor = load_model(model_dir, use_gpu) + + def preprocess(self, image): + """ + preprocess image: hwc_rgb to chw_bgr + """ + img_mat = cv2.resize( + image, self.eval_size, fx=0, fy=0, interpolation=cv2.INTER_CUBIC) # HWC -> CHW - im = im.swapaxes(1, 2) - im = im.swapaxes(0, 1) + img_mat = img_mat.swapaxes(1, 2) + img_mat = img_mat.swapaxes(0, 1) # Convert to float - im = im[:, :, :].astype('float32') - # im = (im - mean) * scale - im = im - self.mean - im = im * self.scale - im = im[np.newaxis, :, :, :] - return im - - def Postprocess(self, image, output_data): + img_mat = img_mat[:, :, :].astype('float32') + # img_mat = (img_mat - mean) * scale + img_mat = img_mat - self.mean + img_mat = img_mat * self.scale + img_mat = img_mat[np.newaxis, :, :, :] + return img_mat + + def postprocess(self, image, output_data): + """ + postprocess result: merge background with segmentation result + """ mask = output_data[0, 1, :, :] mask = cv2.resize(mask, (image.shape[1], image.shape[0])) scoremap = np.repeat(mask[:, :, np.newaxis], 3, axis=2) - bg = np.ones_like(scoremap) * 255 - merge_im = (scoremap * image + (1 - scoremap) * bg).astype(np.uint8) + bg_im = np.ones_like(scoremap) * 255 + merge_im = (scoremap * image + (1 - scoremap) * bg_im).astype(np.uint8) return merge_im - def Predict(self, image): + def run_predict(self, image): + """ + run predict: return segmentation image mat + """ ori_im = image.copy() - im = self.Preprocess(image) - im_tensor = fluid.core.PaddleTensor(im.copy().astype('float32')) + im_mat = self.preprocess(ori_im) + im_tensor = fluid.core.PaddleTensor(im_mat.copy().astype('float32')) output_data = self.predictor.run([im_tensor])[0] output_data = output_data.as_ndarray() - return self.Postprocess(image, output_data) + return self.postprocess(image, output_data) + + +def predict_image(seg, image_path): + """ + Do Predicting on a image + """ + img_mat = cv2.imread(image_path) + img_mat = seg.run_predict(img_mat) + cv2.imwrite('result.jpeg', img_mat) -# Do Predicting on a image -def PredictImage(seg, image_path): - im = cv2.imread(input_path) - im = seg.Predict(im) - cv2.imwrite('result.jpeg', im) -# Do Predicting on a video -def PredictVideo(seg, video_path): +def predict_video(seg, video_path): + """ + Do Predicting on a video + """ cap = cv2.VideoCapture(video_path) - if cap.isOpened() == False: + if not cap.isOpened(): print("Error opening video stream or file") return - w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) - h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # Result Video Writer out = cv2.VideoWriter('result.avi', - cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps, - (int(w), int(h))) + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) # Start capturing from video while cap.isOpened(): ret, frame = cap.read() - if ret == True: - im = seg.Predict(frame) - out.write(im) + if ret: + img_mat = seg.run_predict(frame) + out.write(img_mat) else: break cap.release() out.release() -# Do Predicting on a camera video stream -def PredictCamera(seg): +def predict_camera(seg): + """ + Do Predicting on a camera video stream + """ cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Error opening video stream or file") @@ -121,8 +137,8 @@ def PredictCamera(seg): while cap.isOpened(): ret, frame = cap.read() if ret: - im = seg.Predict(frame) - cv2.imshow('Frame', im) + img_mat = seg.run_predict(frame) + cv2.imshow('Frame', img_mat) if cv2.waitKey(1) & 0xFF == ord('q'): break else: @@ -137,16 +153,17 @@ def main(argv): print('Usage: python infer.py /path/to/model/ /path/to/video') return - model_dir = argv[1] - input_path = argv[2] - use_gpu = int(argv[3]) if len(argv) >= 4 else 0 + model_dir = sys.argv[1] + input_path = sys.argv[2] + use_gpu = int(sys.argv[3]) if len(sys.argv) >= 4 else 0 # Init model mean = [104.008, 116.669, 122.675] scale = [1.0, 1.0, 1.0] eval_size = (192, 192) seg = HumanSeg(model_dir, mean, scale, eval_size, use_gpu) # Run Predicting on a video and result will be saved as result.avi - PredictCamera(seg) + # predict_camera(seg) + predict_video(seg, input_path) if __name__ == "__main__":